g | x | w | all
Bytes Lang Time Link
069JavaScript Node.js240402T200428Zl4m2
034Uiua250719T024631Znyxbird
091Turing Machine Code250718T235046ZErikDaPa
028Japt mh250718T201327ZShaggy
037Vyxal 3250718T172601Zpacman25
095Swift 6240401T160543ZmacOSist
nan240402T112335ZRARE Kpo
098PowerShell Core230319T194032ZJulian
070Perl 5 pF240401T192409ZXcali
106Easyfuck240330T224928ZQuadrupl
042HP 41C/CV/CX 77 bytes150908T131823Zdb2
037GBZ80 machine code210131T061127ZEasyasPi
316brainfuck210131T000940Zengineer
217Forth gforth170620T140706Z2xsaiko
076JavaScript ES6150908T212348Zedc65
104Python 2.7.3161214T084752ZStefan
119C#161214T081507ZStefan
089Perl 6161214T072517Zbb94
073Julia150907T172141ZAlex A.
177Java 8150907T184849Zhyperneu
133Python 3150907T143714ZBeta Dec
294HPPPL150909T225158ZM L
079Python 2150907T195600Zdaniero
189PHP150909T190813ZEinacio
057K150909T162157ZJohnE
096C150907T192153Zr3mainer
1705RPL150909T044432ZJason
194R150908T130217Zmawir
104JavaScript150908T062133ZAndrew V
8173Ruby150907T173622Zdaniero
115Rust150907T144307ZDoorknob
090Javascript ES6150907T162920ZETHprodu
344Lua150908T003434ZDelya Er
290Ceylon150907T194810ZPaŭlo Eb
093Python 3150907T162649ZRohcana
127Python 3150907T181734Zuno20001
111Rust150907T184026Zuser4867
035Pyth150907T142746Zizzyg
037CJam150907T151433ZDennis
089Haskell150907T170904Znimi
193C#150907T155643ZStephan
197STATA150907T152631Zbmarks
139Crystal150907T150257Zkirbyfan

JavaScript (Node.js), 69 bytes

p=>[...p].map(t=>p=255&[~p,p*2,p/2,p*257/16]['!<>@'.indexOf(t)]||p)|p

Try it online!

-3 bytes Shaggy

Uiua, 34 bytes

°⋯˜∧⨬(¬|⬚0↻1|⬚0⌝↻1|↻4|∘)⊚8˜⊗"!><@"

Try it!

˜⊗"!><@" # get the index of each char within "!><@"
⊚8       # push an array of 8 zeroes
˜∧⨬(     # fold across the string, and switch on the indexes:
  ¬      # 0: not
 |⬚0↻1   # 1: rotate left 1, filling with zeroes
 |⬚0⌝↻1  # 2: rotate right 1, filling with zeroes
 |↻4     # 3: rotate left 4
 |∘)      # 4/out-of-bounds: do nothing
°⋯       # convert from binary

The ↻ rotates are flipped because Uiua's binary conversion is backwards (lsb-first).

Turing Machine Code, 91 rules

0 * * l SU    ; setup the 8 0s
SU _ _ l SU8
SU8 _ 0 l SU7
SU7 _ 0 l SU6
SU6 _ 0 l SU5
SU5 _ 0 l SU4
SU4 _ 0 l SU3
SU3 _ 0 l SU2
SU2 _ 0 l SU1
SU1 _ 0 r SU0
SU0 * * r SU0
SU0 _ _ r P ; start reading operators when finished

P ! x l gIN ; if the char is not a no-op, execute.
P < x l gSL
P > x l gSR
P @ x l gSW
P * x r P

gIN x x l gIN ; inverse function
gIN _ _ l IN
IN 0 1 l IN
IN 1 0 l IN
* _ _ r RD ; return back to read another operator

gSL x x l gSL ; shift left function
gSL _ _ l SL0
SL0 0 0 l SL0
SL0 1 0 l SL1
SL1 0 1 l SL0
SL1 1 1 l SL1

gSR x x l gSR ; shift right function
gSR _ _ l gSR2
gSR2 * * l gSR2
gSR2 _ _ r SR0
SR0 0 0 r SR0
SR0 1 0 r SR1
SR1 0 1 r SR0
SR1 1 1 r SR1
SR0 _ _ r RD1
SR1 _ _ r RD1

gSW x x l gSW ; swap nybbles function
gSW _ _ l SW
SW 0 * l 40
SW 1 * l 41
40 * * l 30 ; read 4 bits ahead
30 * * l 20
20 * * l 10
10 0 0 r 4b0
10 1 0 r 4b1
41 * * l 31
31 * * l 21
21 * * l 11
11 0 1 r 4b0
11 1 1 r 4b1
4b0 * * r 3b0 ; and swap it
3b0 * * r 2b0
2b0 * * r 1b0
1b0 * 0 l SW
4b1 * * r 3b1
3b1 * * r 2b1
2b1 * * r 1b1
1b1 * 1 l SW

RD * * r RD  ; read next operator
RD _ _ r RD1
RD1 x x r RD1
RD1 * * * P
RD1 _ _ l CL ; if no more ops left, erase filler chars and convert to decimal.

CL x _ l CL ; clear filler chars
CL _ _ l D  ; if all filler chars are cleared, finally start converting
CL2 0 _ l CL2  ; start clearing all the 0s
CL2 _ _ * halt ; and halt when finished

D 0 1 l D   ; decrement the binary bits
D 1 0 l D2
D2 * * l D2 ; then increment the decimal number
D2 _ _ l I

I _ 1 r f ; increment the decimal number
I 0 1 r f
I 1 2 r f
I 2 3 r f
I 3 4 r f
I 4 5 r f
I 5 6 r f
I 6 7 r f
I 7 8 r f
I 8 9 r f
I 9 0 l I

f * * r f    ; then go back to the binary bits
f _ _ r A0   ; if reached one
A0 0 * r A0  ; check that the rest isn't all 0s
A0 1 * r f2  ; if not all 0s, go back to the binary bits and decrement
A0 _ _ l CL2 ; if is all 0s, start clearing for result display
f2 * * r f2  ; go back until LSB is reached
f2 _ _ l D   ; then decrement

Try this online!
It is recommended that you run at full speed, as the binary conversion is very very slow.

Japt -mh, 28 bytes

Port of l4m2's JS solution

T=#ÿ&[~TT/2TÑ57/GT]g"!<>@"bU

Try it

Vyxal 3, 37 bytes

0?ƛ"><!@"⟒∩"2∻2×N‹"²"H2Þ0⇄H"JyᏜ⑦%+]“ᴥ

Vyxal It Online!

-1 from changing how i implemented @

Left fork aware! (also implicit input cost me 2 bytes due to exec shenanigans)

All of the 33 byters fail on !>>>>@ because it doesn't properly reverse 15 hex to F0.

Swift 6, 99 95 bytes

let b={($0+"").reduce(UInt8()){a,i in"!"==i ? ~a:i==">" ?a/2:i=="<" ?a<<1:i=="@" ?a>>4|a<<4:a}}

Try it on SwiftFiddle!

AWK -

one can write fully POSIX-compliant awk code without using a single semi-colon (;)

 echo '
    ! -> 255
    !>> -> 63
    !<@ -> 239
    !nop!&6*! -> 255' |

gawk 'function _______(__,___,____,_____,_) {
  
  _ = "^[^!]+|@@|!!|[^!"(_____ = "<>@")"]+"
  while(gsub(_,___,__)) { }
  _ += ++_+_
  
  if (__ < "! ")
      return (_+++_^_-_)*!!__

  split(substr(__,--_),____,___)
  __ = ___ = _+_^_^++_-_
  
  for(_ in ____)___ = (_=index(_____,____[_]))<=!!_ ? (_ ?(___+___) % (_+__) \
      : __-___) : _+_==_*_ ? (___-___%_)/_ : ___<__ ? ++_*___*_%__ : ___
  return ___ 

} ($++NF = _______($!_))^_'

! -> 255 255
!>> -> 63 63
!<@ -> 239 239
!nop!&6*! -> 255 255

# gawk profile, created Tue Apr  2 07:15:18 2024

# Rule(s)

 4  ($++NF = _______($!_))^_ { # 4
 4      print
     }


# Functions, listed alphabetically

 4  function _______(__, ___, ____, _____, _)
    {
 4      _ = "^[^!]+|@@|!!|[^!" (_____ = "<>@") "]+"
 2      while (gsub(_, ___, __)) { }

 4      _ += ++_ + _
 4      if (__ < "! ")
 2          return (_+++_^_-_) * !!__

 2      split(substr(__, --_), ____, ___)
 2      __ = ___ = _+_^_^++_-_

 4      for (_ in ____)
 4          ___ = (_ = index(_____,____[_])) <= !!_? (_? (___+___) % (_+__) :\
            __-___) : _+_ == _*_ ? (___-___%_)/_ : ___<__ ? ++_*___*_%__ : ___

 2      return ___
     }

PowerShell Core, 98 bytes

switch($args){!{[byte]$a=-bnot$a-band255}`<{$a=$a-shl1}`>{$a=$a-shr1}`@{$a=($a-shl4)+($a-shr4)}}$a

Try it online!

Perl 5 -pF, 70 bytes

$\=(/\@/?($\<<4)+($\>>4):/!/?255-$\:/>/?$\>>1:/</?$\<<1:$\)%256for@F}{

Try it online!

Easyfuck, 106 bytes

Ì¢RGÏCCHçP®óëÊR>hñãåPLD¯[¼ùbë×ZÙ]nSŸ␝«¶òëêl%!}yn␅␏APC>|PU1ó␚jPM/|PU1ãÜÏ␔ç[HOPCçÏ¿fSOSCn␅␏APC}ÌùNu¸␔>÷SGCIóå*ï]CCHùóçÏSTSÀ␛á␋çPAD␀␀ÿ

due to lack of unicode representations for c1 control characters, they have been replaced by their superscripted abbreviations

Decompressed:

f(J$>>)g(+^>^)$>4<<>,.^[^>,.^]5Y.[J<-`;+[<]>S0J!>^-`(>>>>$>1S*</>$<=f<)g-`(>>>~f)g-`(>>}f>)g-`(>{f>>)+^]J>>>>>'@␍x!><␀␀

HP 41C/CV/CX (77 bytes, 42 steps)

Purely for giggles, here it is for the HP 41C/CV/CX calculator. (Requires either the Extended Functions module, or a 41CX for the ATOX function.)

Put your program into the Alpha register, which is a little tricky, as there's no way to enter ! or @ directly from the keyboard (use XTOA with the ASCII codes 33 and 64 respectively to append them).

Steps 08 and 10 allow for ignoring invalid opcodes; remove them to save 2 steps, but the program will crash on invalid input.

01 LBL"BB
02 0
03 LBL A
04 ATOX
05 X=0?
06 GTO E
07 X<>Y
08 SF 25
09 XEQ IND Y
10 CF 25
11 GTO A
12 LBL 33
13 255
14 X<>Y
15 -
16 RTN
17 LBL 60
18 2
19 *
20 256
21 MOD
22 RTN
23 LBL 62
24 2
25 /
26 INT
27 RTN
28 LBL 64
29 RCL X
30 16
31 /
32 INT
33 X<>Y
34 16
35 *
36 256
37 MOD
38 +
39 RTN
40 LBL E
41 RDN
42 RTN

GBZ80 machine code, 37 bytes

As soon as I saw this challenge, I knew I had to do this for the Game Boy where this maps 1:1.

000000a5: af 47 2a a7 c8 fe 21 28 00 fe 3c 28 00 fe 3e 28  .G*...!(..<(..>(
000000b5: 00 fe 40 20 ed cb 30 18 e9 78 2f 18 e4 cb 20 18  ..@ ..0..x/... .
000000c5: e1 cb 38 18 dd                                   ..8..

Commented assembly (rgbds with # comments instead of ;):

        SECTION ".text",ROM0
        # input: null terminated string in HL
        # output: accumulator final value in B
func::
        # a = 0
        xor     a, a
.with_ld:
        # Save A to B so we can load the next byte from the tape.
        ld      b, a
        # Since not all instructions require the A register, we can skip the
        # ld by jumping here instead.
.no_ld:
        # load byte from HL and increment
        # a = *hl++
        ld      a, [hli]
        # and A against itself to check for zero
        and     a, a
        # return if null terminator
        ret     z
        # just a normal compare and branch chain
        cp      a, "!"
        jr      z, .cpl
        cp      a, "<"
        jr      z, .left
        cp      a, ">"
        jr      z, .right
        cp      a, "@"
        # comment char, skip without ld
        jr      nz, .no_ld
        # b = (b >> 4) | (b << 4)
        swap    b
        jr      .no_ld
.cpl:
        # we need to use A for CPL
        ld      a, b
        # a = ~a
        cpl
        jr      .with_ld
.left:
        # b <<= 1
        sla     b
        jr      .no_ld
.right:
        # b >>= 1
        srl     b
        jr      .no_ld

Here is a version which prints directly using printf (SDCC ABI), satisfying the "must print" requirement (which I don't enjoy doing), 53 bytes

00 00 represents unlinked placeholders for the string below and _printf.

000000e4: af 4f 2a a7 28 00 fe 21 28 00 fe 3c 28 00 fe 3e  .O*.(..!(..<(..>
000000f4: 28 00 fe 40 20 ec cb 31 18 e8 79 2f 18 e3 cb 21  (..@ ..1..y/...!
00000104: 18 e0 cb 39 18 dc 47 c5 21 00 00 e5 cd 00 00 e8  ...9..G.!.......
00000114: 04 c9 25 75 00                                   ..%u.

The only differences is that it uses c instead of b and adds a call to printf.

        # .area _BASE
        SECTION ".text", ROM0
        # input: null terminated string in HL
        # output: decimal printed with printf
func::
        # a = 0
        xor     a, a
.with_ld:
        # save A to C
        ld      c, a
        # since not all instructions require the A register, we can skip the
        # ld by jumping here.
.no_ld:
        # a = *hli++
        ld      a, [hli]
        # and a, a is better than cp a, 0
        and     a, a
        # return if null terminator
        jr      z, .print
        # just a normal compare and branch chain
        cp      a, "!"
        jr      z, .cpl
        cp      a, "<"
        jr      z, .left
        cp      a, ">"
        jr      z, .right
        cp      a, "@"
        # comment char, skip without ld
        jr      nz, .no_ld
        # c = (c >> 4) | (c << 4)
        swap    c
        jr      .no_ld
.cpl:
        # we need to use A for CPL
        ld      a, c
        # a = ~a
        cpl
        jr      .with_ld
.left:
        # c <<= 1
        sla     c
        jr      .no_ld
.right:
        # c >>= 1
        srl     c
        jr      .no_ld
.print:
        # zero extend bc
        # a is known to be zero
        ld      b, a
        # SDCC uses cdecl
        push    bc
        # "%u"
        ld      hl, .str
        push    hl
        # printf("%u", (u16)result);
        call    _printf
        # restore stack and return
        add     sp, 4
        ret
.str:
        db "%u"
        db 0

Screenshot: screenshot

brainfuck, 316 bytes

->,[>++++[<-------->-]<-[>>]<[>>>>+[->-<]>[-<+>]<<<]++++[<------->-]<+[>>]<[>>>>[->++<]>[-<+>]<<<]<--[>>]<[>>>>[->+>+<<]>>[->>--[--<->]<-<]>[[-]<<->>]<<[--<+>]<<<]<--[>>]<[>>>>[->+>+<<]>>[->>++++[-<++++>]<<]>[->+>+<<]>>[>++++[<---->-]>+<<]>>[-<<<<<<->>>>>>]<<<<<<[>++++[<---->-]>+<<]>>>[-<+>]<[-<<<+>>>]<<<<<]<,]>>>.

Try it online!

Explanation:

->, input to cell 1
[
>++++[<-------->-]<- sub 33= now will be 0 if invert instruction (cell 1)
[>>] if not move to cell 3
<[    runs if !
>>>>+[->-<]>[-<+>]<<<  invert cell 4 to cell 5 back to 4 end 2
]
now ends on cell 2 either way

++++[<------->-]<+ sub 27
[>>]
<[ runs if shl
>>>>[->++<] shl cell 4 to cell 5
>[-<+>]< back to 4
<< to 2
]
ends on 2

<-- sub 2
[>>]
<[    runs if shr (hard)
>>>>[->+>+<<] copy 4 to cell 5 and 6
>>[->>--[--<->]<-<] get evenness of 6 to 7
>[ if odd
[-] zero cell 7
<<->> sub cell 5
]
ends cell 7
<<[--<+>]<<< shr cell 5 to 4
]
ends cell 2 again

<-- sub 2
[>>]
<[ if @ (also very hard)
>>>>[->+>+<<] copy 4 to 5 and 6 (end on 4)
>>[->>++++[-<++++>]<<] mul cell 6 by 16 to 7
>[->+>+<<] copy 7 to 8 and 9
>>[>++++[<---->-]>+<<] div 9 by 16 to 11= chopping off upper bits
>>[-<<<<<<->>>>>>]<<<<<< sub 5 by 11
[>++++[<---->-]>+<<] div 5 by 16 to 7
>>>[-<+>] add 8 to 7
<[-<<<+>>>]<<< move 7 to 4
<<
]
end on 2 again

<,] end on 1

a ends up at cell 4

>>>. print

Forth (gforth), 217

: o over ; : x 0 begin key dup emit swap o 13 = if . exit else o 33 = if invert else o 62 = if 1 rshift else o 60 = if 1 lshift else o 64 = if dup 4 lshift swap 4 rshift or then then then then then nip 255 and again ;

Not as short as I had hoped but oh well. I'm sure this can be golfed further. There doesn't seem to be any alternatives to if/elseif :/

Formatted / ungolfed:

: x
  0 ( accumulator )
  begin
    key dup emit swap ( read char, print it and stash it away )
         over 13 = if . exit ( on newline, print accumulator and return. )
    else over 33 = if invert ( inversion )
    else over 62 = if 1 rshift ( shift 1 right )
    else over 60 = if 1 lshift ( shift 1 left )
    else over 64 = if dup 4 lshift swap 4 rshift or ( swap )
    then then then then then
    nip ( remove read character )
    255 and ( constrain accumulator to 1 byte after each step )
  again
;

JavaScript (ES6), 76 81

As an unnamed function returning the accumulator value

This is a porting of the super clever answers by @daniero (that have way too few upvotes)

Bonus: you can pass an initial value of the accumulator. If not passed, starting value is 0 as by specific.

(p,a)=>(p.replace(/[!<>@]/g,i=>a=(i<'<'?~a:i<'>'?a*2:i<'@'?a/2:a*257/16)&255),a)

Test running the snippet below in any EcmaScript 6 browser (I tested in Firefox)

f=(p,a)=>[...p].map(c=>a=255&[a,~a,a*2,a/2,a*257/16][1+'!<>@'.indexOf(c)])|a

// TEST
out=x=>O.innerHTML+=x+'\n'

function go(x) { out(x+' -> '+f(x)) }

go('!'),go('!>>'),go('!<@'),go('!nop!&6*!')

// LESS GOLFED
F=(p,a)=>// a as a parameter, if not passed its value starts as undefined, then becomes NaN, but the operators '&' and '~' treat it as 0
  [...p].map(c => // execute following function for each character p
    a = 255 & // any intermediate result is converted to numeric and truncate to a byte          
   // evaluate all possible results (then choose one bases on the current character)
   [a,   // NOP, if unexpected char 'a' remains the same
    ~a,  // tilde == binary not (will give a result wider than a byte)
    a*2, // < shift left is *2 (can give a result wider than a byte) 
    a/2, // > shift right is /2 (can give a non integer result)
    a *257 / 16  // move nibbles around (will give a result wider than a byte)
   ] // array of all results
   [1+'!<>@'.indexOf(c)] // find index to get the correct result
  ) // end map, returns an array in any case
    // eventually a single element array containg a
  | a // return accumulator
Test program:<input id=I><button onclick='go(I.value)'>go</button>
<pre id=O></pre>

Python 2.7.3, 104 bytes

Having code in strings to be evaluated looks pretty dirty, but works :D

a=0
for c in raw_input():a=eval({'!':'~a','<':'a<<1','>':'a>>1','@':'a<<4|a>>4'}.get(c,'a'))&255
print a

Here's the output (and input actually..)

And yes, it's really running on a RaspberryPi :)

Example output

C#, 119 bytes

i=>{var a=0;foreach(var c in i){if(c=='!')a=~a;if(c=='>')a>>=1;if(c=='<')a<<=1;if(c=='@')a=a<<4|a>>4;a&=255;}return a;}

Other versions I tried, but need more bytes:

Func<string,int>C=i=>{var a=0;foreach(var c in i){switch(c){case'!':a=~a;break;case'<':a<<=1;break;case'>':a>>=1;break;case'@':a=a<<4|a>>4;break;}a&=255;}return a;};

// This is, despite having the worst score, my personal favourite :D
Func<string,int>D=i=>{var f=new Dictionary<char,Func<int,int>>{{'!',q=>~q},{'<',q=>q<<1},{'>',q=>q>>1},{'@',q=>q<<4|q>>4}};var a=0;foreach(var c in i)if(f.ContainsKey(c))a=f[c](a)&255;return a;};

Perl 6, 96 89 bytes

{my $a=0;$a=(+^*,*+<1,*+>1,{$_+<4+$_+>4},{$_})["!<>@".index($_)//4]($a)%256 for .comb;$a}

Old solution:

{my $a=0;$a=(255-*,*+<1+&255,*+>1,{$_+&15+<4+$_+>4},{$_})["!<>@".index($_)//4]($a)for .comb;$a}

Julia, 117 94 86 73 bytes

p->(a=0x0;[a=c==33?~a:c==60?a<<1:c==62?a>>1:c!=64?a:a<<4|a>>4for c=p];1a)

This is an anonymous function that accepts a string and returns an integer. To call it, assign it to a variable.

Ungolfed:

function f(p)
    # Initialize the accumulator to 0 as an 8-bit unsigned integer
    a = 0x0

    # Loop over the characters in the input
    for c in p
        a = c == 33 ? ~ a :        # '!'
            c == 60 ? a << 1 :     # '<'
            c == 62 ? a >> 1 :     # '>'
            c != 64 ? a :          # no-op
            a << 4 | a >> 4        # '@'
    end

    # Convert the accumulator to a regular integer and return
    return Int(a)
end

Saved 8 bytes thanks to Sp3000 and 13 thanks to Dennis!

Java (8), 514 483 411 366 359 239 224 229 198 194 187 186 184 182 181 180 177 characters

Wow, this has been golfed down a LOT! Thanks to everyone who gave me suggestions! I greatly appreciate it!

interface T{static void main(String[]g)throws Exception{int i,a=0;while((i=System.in.read())!=10)a=(i==33?255-a:i==62?a/2:i==60?a*2:i==64?a>>4|a<<4:a)%256;System.out.print(a);}}

Golfed 31 (!) bytes by optimizing the nibble swap with bitwise operations as opposed to lengthy Integer.??? methods.

Golfed 72 (!!!!) chars by removing the unnecessary string created to swap nibbles. Much better than before!?

Golfed 45 (!!) chars by removing use of java.util.Scanner and reading from System.in directly. Note that now that the lambda expression is gone, Java 8 is no longer required! Just merely Java 1 would do!

Golfed 7 chars by making class (default) (removed public keyword), thanks to @bmarks

Golfed 120 (!!!!!!!) chars by turning all those lengthy Integer class operations in the bit flipping to 255 - a. Now that's much shorter!

Golfed 15 (!) chars by converting shifts to multiplication and division, removing the braces from the while statement, and making a local within the main method.

Ungolfed 9 =( chars because of a problem with the left shift not discarding the leftmost byte. Therefore, I now do mod (256). The right shift will make the resulting number one bit shorter than before, so there is no need to use mod on right shift. My nibble-swap thing will swap the last 4 bits and the second-last nibble, and the and (&) truncates all other bits. My inversion program doesn't cause any problems if the original number is less than 256.

Golfed 31 35 chars thanks to @Geobits by converting switch statement to a lot of ternary statements, and also converting chars to ints, shortening the literals.

Golfed 7 chars by removing unnecessary &240 in the nibble swap ((a&240)>>4 to a>>4 and converting (a&15)<<4 to a<<4&240. The last change only golfed one character though.

Golfed 1 char by removing unnecessary = in a /= 2, because a = a /= 2 is equivalent to a = a / 2.

Golfed 2 chars by turning println to print.

Golfed 2 chars by removing accidental a= in a=255-a (a=a=255-a is equivalent to a=255-a)

Golfed 1 char by turning a<<4&240 into a%16<<4.

Golfed 1 char by adding brackets to the outside of the ternary statement and doing %256. That way, the %16 is unnecessary in the left-shift part of the nibble swap. The brackets add 2 chars and the %16 saves 3 chars.

Golfed 3 chars by changing class to interface and removing public using Java 8's static interface method feature. Thanks to @TheNumberOne (no comment, but find his answer on "Tips for golfing in Java"

Python 3, 133 bytes

Uses a dictionary to make up for a lack of switch-case syntax in Python. See more here.

a="0"*8
for i in input():a={"!":''.join(str(1-int(b))for b in a),"<":a[1:]+"0",">":"0"+a[:-1],"@":a[4:]+a[:4]}.get(i,a)
print(int(a,2))

The accumulator is a string which is converted into a base 10 number at the end.

Example I/O:

$ python3 bitsnbytes.py
!
255
$ python3 bitsnbytes.py
!>>
63
$ python3 bitsnbytes.py
!<@
239
$ python3 bitsnbytes.py
!nop!&6*!
255

HPPPL, 302 294 bytes

#pragma mode(separator(.,;)integer(d8))EXPORT b()BEGIN print();local p,j,a;a:=#0d;INPUT({{p,[2]}});for j from 1 to dim(p)do c:=p(j);case if c==33 then a:=BITNOT(a)end if c==62 then a:=BITSR(a,1)end if c==60 then a:=BITSL(a,1)end if c==64 then a:=BITSL(a,4)+BITSR(a,4)end end;end;print(a*1);END;

Ungolfed:

// make sure integers are unsigned 8 bit decimal numbers
#pragma mode( separator(.,;) integer(d8) ) 
EXPORT b()
BEGIN
  print();
  local p,j,a;
  a:=#0d;                         // set a to integer value 0
  INPUT({{p,[2]}});               // open input dialog treating input as string ( type [2])
  for j from 1 to dim(p) do
    c:=p(j);
    case
      if c==33 then a:=BITNOT(a) end             // !
      if c==62 then a:=BITSR(a,1) end            // >
      if c==60 then a:=BITSL(a,1) end            // <
      if c==64 then a:=BITSL(a,4)+BITSR(a,4) end // @
    end;
  end;
  print(a*1); // converts to proper output by promoting to non integer format
              // print(a) would result in
              // #239:8d for 239 if the default bit size is not set to 8 bits decimal
              // indicating an 8 bit unsigned decimal integer, or
              // #239d if the default bit size is already set to 8 bits decimal

END;

HPPPL Input command

HPPPL Output to Terminal

This answer ensures that the HP Prime uses unsigned 8 bit integers even if the mode is set to e.g. 64 bits by the user. If the calculator is set up manually to using unsigned 8 bit decimal integers, then the pragma command can be omitted. If the output does not need to follow the format strictly then the a*1 at the end can simply be a. Multiplying the result by 1 just ensures the output does not follow the internal output for integer values. The print command in line 4 can also be omitted if the terminal does not need to be cleared before printing out the result. If passing the program as a string argument is allowed, then the INPUT command can be omitted as well.

This is the shortest version with input and proper output, without the pragma argument (if the calculator is set to Uint8 by default:

243 bytes:

EXPORT b()BEGIN local p,j,a;a:=#0d;INPUT({{p,[2]}});for j from 1 to dim(p)do c:=p(j);case if c=33 then a:=BITNOT(a)end if c=62 then a:=BITSR(a,1)end if c=60 then a:=BITSL(a,1)end if c=64 then a:=BITSL(a,4)+BITSR(a,4)end end;end;print(a*1);END;

Python 2, 79 bytes

I realized that I have done something very similar to this in Python earlier. This is just a port of my Ruby answer, but it is incidentally the shortest Python answer as of now :D

a=0
for i in raw_input():a=[~a,a/2,a*2,a*16+a/16,a]["!><@".find(i)]&255
print a

The difference from the Ruby version is that this one doesn't ignore invalid instructions while iterating over the input. Instead I take advantage of the fact that Python tends to return -1 instead of nil when there is no match -- The current value of a is appended to the back of the result array, so that all invalid instructions maps to the same, unchanged value.

PHP, 189 bytes

<? $c='00000000';foreach(str_split($argv[1])as$a){$a=='!'&&$c=strtr($c,'01','10');$a=='<'&&$c=substr($c.'0',1);$a=='>'&&$c=substr('0'.$c,0,8);$a=='@'&&$c=substr($c.$c,4,8);}echo bindec($c);

It's not that it gonna beat many answers, it's only for practice

K, 57 bytes

It's a start:

0{y+2*x}/(8#0){((~:;{-1_0,x};{1_ x,0};4!;{x})"!><@"?y)x}/

tested using Kona:

  f:0{y+2*x}/(8#0){((~:;{-1_0,x};{1_ x,0};4!;{x})"!><@"?y)x}/
...
  f'("!";"!>>";"!<@";"!nop!&6*!")
255 63 239 255

I might be able to do better in k5, but it's a complex series of tradeoffs- for example, converting binary to decimal is as easy as 2/, but the behavior of ? makes it harder to handle a default case for instruction lookup.

C, 96

Assuming ASCII (or compatible) input:

a;main(c){while(c=getchar()+1)a=(c^34?c^61?c^63?c^65?a:a*257/16:a/2:a*2:~a)&255;printf("%u",a);}

Tidier:

a;
main(c){
  while(c=getchar()+1)
    a=(c^34?
      c^61?
        c^63?
          c^65?
            a
          :
            a*257/16
        :
          a/2
      :a*2:~a
    )&255;
  printf("%u",a);
}

Basically it's just a collection of nested ternary expressions. I'm incrementing the value obtained from getchar() so that an EOF (-1) results in a value of zero and the program exits.

(ideone link)

RPL, 170.5 bytes

The input should be entered as a string on level 1.

\<< DEC 8 STWS \-> S 
    \<< #0d 1 S SIZE 
        FOR I "!><@" S I DUP SUB POS 1 + { \<< \>> NOT SR SL \<< DUP #16d / SWAP #16d * + \>> } SWAP GET EVAL NEXT \>> 
\>>

R, 194 bytes

b<-readline();A<-rep(0,8);s<-strsplit(b,"")[[1]];for(r in s){if(r=="!")A<-(A+1)%%2;if(r==">")A<-c(0,A)[1:length(A)];if(r=="<")A<-c(A,0)[-1];if(r=="@")A<-c(A[5:8],A[1:4])};print(sum(A*(2^(7:0))))

ungolfed

b <- readline()
A <- rep(0, 8) 
s <- strsplit(b, "")[[1]]
for (r in s) {
    if (r == "!")
        A <- (A + 1) %% 2
    if (r == ">")
        A <- c(0, A)[1:length(A)]
    if (r == "<")
        A <- c(A, 0)[-1]
    if (r == "@")
        A <- c(A[5:8], A[1:4])
}
print(sum(A*(2^(7:0))))

JavaScript, 104

[].reduce.call(prompt(),function(a,i){return(i=='!'?~a:i=='>'?a/2:i=='<'?a*2:i=='@'?a>>4|a<<4:a)&255},0)

Nested ternary operators map to instructions.

BITWISE AND is used to constrain our Number type to a single byte.

Ruby, 81 73 bytes

So much simpler -- no eval! For each valid character in the input, it evaluates each instruction, and finds the appropriate instruction through the index of $& (the current character in the input).

a=0
gets.scan(/[!><@]/){a=[~a,a/2,a*2,a*16+a/16]["!><@".index$&]&255}
p a

Rust, 121 115 bytes

fn r(s:&str)->u8{let mut n=0u8;for t in s.chars(){match t{'!'=>n=!n,'>'=>n/=2,'<'=>n<<=1,'@'=>n=n>>4|n<<4,_=>()}}n}

Sample run:

fn main() {
    println!("{}", r("!"));    //=> 255
    println!("{}", r("!>>"));  //=> 63
    println!("{}", r("!<@"));  //=> 239
}

Ungolfed:

fn run_ungolfed(s: &str) -> u8 {
    let mut n = 0u8;
    for t in s.chars() {
        match t {
            '!' => n = !n,
            '>' => n >>= 1,
            '<' => n <<= 1,
            '@' => n = (n >> 4) | (n & 15) << 4,
            _ => ()
        }
    }
    n
}

Surprisingly short for Rust. Nothing else really interesting other than the fact that I learned more precedence rules today—who knew (a>>b)|c is the same as a>>b|c?

Shaved off a byte by changing n>>=1 to n/=2; however, the same cannot be done with multiplication, because arithmetic overflow is a panic (i.e. crash) in Rust.

Javascript (ES6), 80 91 90 bytes

a=>[...a].reduce((p,c)=>c=='!'?p^255:c=='<'?p*2%256:c=='>'?p>>1:c=='@'?p/16|0+p%16*16:p,0)

Pretty much as short as it can get. Defines an anonymous function which takes the program as input.

Thanks to @vihan for suggesting the use of reduce to save a byte.

Lua, 344 char

a=string.rep("0",8)
t=io.read()
f={["!"]=function()local s="";for j=1,8 do s=s..(a:sub(j,j)=="0"and"1"or"0") end;return s end,[">"]=function() return "0"..a:sub(1,7) end,["<"]=function()return a:sub(2,8).."0"end,["@"]=function()return a:sub(5,8)..a:sub(1,4)end}
for i=1,#t do a=(f[t:sub(i,i)]or function()return a end)()end
print(tonumber(a,2))

Inspired by @Beta Decay's use of a string accumulator, seeing as lua has no byte type. Could probably be golfed more by using fewer functions.

Ceylon, 297 290

shared void y(){value t=process.readLine()else"";variable Byte a=0.byte;for(c in t){switch(c)case('!'){a=a.not;}case('>'){a=a.rightLogicalShift(1);}case('<'){a=a.leftLogicalShift(1);}case('@'){a=a.and(#f0.byte).rightLogicalShift(4).xor(a.and(#f.byte).leftLogicalShift(4));}else{}}print(a);}

Formatted:

shared void y() {
    value t = process.readLine() else "";
    variable Byte a = 0.byte;
    for (c in t) { switch (c)
        case ('!') { a = a.not; }
        case ('>') { a = a.rightLogicalShift(1); }
        case ('<') { a = a.leftLogicalShift(1); }
        case ('@') { a = a.and(#f0.byte).rightLogicalShift(4).xor(a.and(#f.byte).leftLogicalShift(4)); }
        else {} }
    print(a);
}

#f and #f0 are hexadecimal numbers for the nibbles, .byte converts an integer into a byte. I'm lucky that Byte's .string attribute already uses the unsigned representation of a byte. Ceylon also features a switch statement without fall-through, and a string is a list of characters, which can be iterated.

I also tried to cut those long shift method names down by using an aliasing import, but this actually becomes 7 bytes longer:

import ceylon.language{Byte{r=rightLogicalShift,l=leftLogicalShift}}shared void x(){value t=process.readLine()else"";variable Byte a=0.byte;for(c in t){switch(c)case('!'){a=a.not;}case('>'){a=a.r(1);}case('<'){a=a.l(1);}case('@'){a=a.and(#f0.byte).r(4).xor(a.and(#f.byte).l(4));}else{}}print(a);}

Formatted:

import ceylon.language {
    Byte {
        r=rightLogicalShift,
        l=leftLogicalShift
    }
}
shared void x() {
    value t = process.readLine() else "";
    variable Byte a = 0.byte;
    for (c in t) {
        switch (c)
        case ('!') { a = a.not; }
        case ('>') { a = a.r(1); }
        case ('<') { a = a.l(1); }
        case ('@') { a = a.and(#f0.byte).r(4).xor(a.and(#f.byte).l(4)); }
        else {}
    }
    print(a);
}

This might be useful if we need those methods a bit more often.

Python 3, 124 94 93 bytes

a=0
for i in input():
 if i in"!><@":a=(i=='!')*(255-a)+(i==">")*a//2+(i=="<")*(a+a)%256+(i=="@")*(16*(a%16)+a//16)
print(a)

"!" is same as subtracting from 255.
"<" is same as multiplying by 2. But 8 bit register means mod 256.
">" is same as integer division by 2.
"@" means shifting last 4 bits (a%16) by 4 bits(*16) and adding the first four bits(a/16).

EDIT (read shameless copying)
Saw the other answer in python (by Beta decay). It uses a really effective way to simulate switch cases using dictionary. Using that we can write

a=0
for i in input():a={"!":255-a,"<":a<<1&255,">":a//2,"@":(a%16)<<4+a>>4}.get(i,a)
print(a)

Thanks, Beta Decay.

Python 3, 127 bytes

Edit: shorting, thanks @Jakube

Edit2: fix, thanks @Anachor

a=0
for i in input():a=(a^255if i=="!"else a>>1if i==">"else a<<1if i=="<"else(a&15)<<4|(a&240)>>4if i=="@"else a)&255
print(a)

Rust, 111 bytes

More of a comment on @Doorknob's answer, but I don't have any rep for comments as I just created an account.

One can shave 10 bytes off his Rust solution with the following:

fn r(s:&str)->u8{let mut n=0u8;for t in s.chars(){n=match t{'!'=>!n,'>'=>n>>1,'<'=>n<<1,'@'=>n>>4|n<<4,_=>n}}n}

Pyth, 36 35 bytes

u%@[t_G/G2yGi_jGJ16JG)x"!><@"H256z0

Test harness

The internal representation of the accumulator is an integer. This integer is mod-ed by 256 on each iteration, as desired. The operations performed are -G-1, G/2, G*2 and G converted to base 16, reversed, and converted back to base 10, where G is the accumulator.

I missed the line about ignoring everything else. This has been remedied. Thanks, @Dennis.

CJam, 37 bytes

0q{"!><@"#"~ 2/ 2* GmdG*+ "S/=~255&}/

Try it online in the CJam interpreter.

How it works

0                   e# Push 0 (accumulator).
q                   e# Read from STDIN.
{                   e# For each character in the input:
  "!><@"#           e#   Find its index in "!><@" (-1 if not found).
  "~ 2/ 2* GmdG*+ " e#   Push that string.
  S/                e#   Split at spaces to push ["~" "2/" "2*" "GmdG*+" ""].
                    e#     "~"      : signed 64-bit bitwise NOT
                    e#     "2/"     : divide by 2
                    e#     "2*"     : multiply by 2
                    e#     "GmdG*+" : (x) -> (x/16) (x%16) -> (16(x%16) + (x/16))
                    e#     ""       : NOOP
  =~                e#  Select the corresponding string and evaluate it.
  255&              e#  Zero all but the 8 least significant bits.
}/                  e#

Haskell, 89 bytes

a#'!'=255-a
a#'>'=div a 2
a#'<'=mod(a*2)256
a#'@'=mod(a*16)256+div a 16
a#_=a
f=foldl(#)0

Usage example: f "!>>" -> 63

C# 193

void Main(){byte a=0;foreach(var c in Console.ReadLine()){if(c=='!')a=(byte)~a;if(c=='>')a=(byte)(a>>1);if(c=='<')a=(byte)(a<<1);if(c=='@')a=(byte)(((a&240)>>4)|((a&15)<<4));}Console.Write(a);}

STATA, 197 bytes

di _r(a)
gl b=0
forv x=1/`=length("$a")'{
gl c=substr("$a",`x',1)
if"$c"=="!" gl b=255-$b
if"$c"==">" gl b=int($b/2)
if"$c"=="<" gl b=mod($b*2,256)
if"$c"=="@" gl b=mod($b,16)*16+int($b/16)
}
di $b

Ungolfed

display _request(a) //get the input via prompt and put in var a
global b=0 //initialise A to be 0
forv x=1/`=length("$a")'{ //for loop from 1 to last char in a
global c=substr("$a",`x',1) //get the char at index x in a
if "$c"=="!" global b=255-$b //invert is the same as 255-A
if "$c"==">" global b=int($b/2) //right shift is the same as A/2 (with integer division)
if "$c"=="<" global b=mod($b*2,256) //left shift is the same as A*2%256
if "$c"=="@" global b=mod($b,16)*16+int($b/16) //nibble swap is the same as A%16*16+A/16
}
display $b //display the result of A

Does not work with the online interpreter and requires the non-free default interpreter. This would be somewhat easier with actual bitwise operations, but I don't think they're too useful for most of the common uses of STATA.

Crystal, 139 bytes

def f x
b=0_u8
x.chars.each do|c|
b=case c
when'!'
~b
when'>'
b>>1
when'<'
b<<1
when'@'
b<<4|b>>4
else raise ""
end
end
puts b
end