g | x | w | all
Bytes Lang Time Link
839C clang lm170105T145251ZG B
421J170107T115946Zmiles
519Python 2160605T072027ZSp3000
633Python 2160603T164213Zuser4594

C (clang) -lm, 1913 ... 839 bytes

#define q unsigned l
#define I x->
#define D(c)I b[1]+=*I b>1<<32-c,*I b+=c
#define R(a,b)(a>>b|a<<32-b)
#define S(x,a,b)(R(x,a)^R(x,b)^x>>
#define G(b);for(o=0;o<b;o++)
#define V(a,b){q=0;for(n=2;k=l<=o;k/n?r=n,l++:0,n++)while(n%++k);b(1L<<32)*a(r);}
#define B bzero(I d
typedef struct{char d[64];q,b[2],s[8];}X;q,k,n,r,o,a[8],t,m[64];A(X*x){bcopy(I s,a,32)G(64){t=a[4];t=a[7]+(R(t,6)^t^S(t,11,25)0))+(t&a[5]^~t&a[6])+(m[o]=o<16?htonl(o[(int*)I d]):S(m[o-2],17,19)10)+m[o-7]+S(m[o-15],7,18)3)+m[o-16]),l=R(*a,2)^*a^S(*a,13,22)0),l+=*a&a[1]^(*a^a[1])&a[2],bcopy(a,a+1,28);V(cbrt,a[4]+=t+=)*a=t+l;}G(8)I s[o]+=a[o];}i(X*x){B,88)G(8)V(sqrt,I s[o]=)}p(X*x,char*w,q){G(l)I d[I l]=w[o],++I l>63?A(x),D(32),I l=0:0;}f(X*x,*h){I d[o=I l]=128;B+ ++o,56+o/57*8-o);o>56?A(x),B,56):0;D(I l*8)G(8)I d[63-o]=I b[o/4]>>o%4*8;A(x)G(8)o[h]=htonl(I s[o]);}

Try it online!

Thanks ceilingcat for a lot of improvements, I didn't expect to get under 1Kb.

Old version - before ceilingcat's improvements

C, 1913 1822 bytes (just for fun)

#define q unsigned
#define D(a,c)x->b[1]+=a>1<<33-1-c;a+=c;
#define R(a,b)(a>>b|a<<32-b)
#define S(x,a,b,c)(R(x,a)^R(x,b)^x>>c)
#define W(i,a)i=x->s[a];
#define Y(i,a)x->s[a]+=i;
#define Z(_,a)h[i+a*4]=x->s[a]>>(24-i*8);
#define J(a)x->d[a]
#define T(_,a)x->d[63-a]=x->b[a/4]>>8*(a%4);
#define Q(_,a)x->s[a]=v[a];
#define A(F)F(a,0)F(b,1)F(c,2)F(d,3)F(e,4)F(f,5)F(g,6)F(h,7)
#define G(a,b)for(i=a;i<b;++i)
typedef struct{q char d[64];q l,b[2],s[8];}X;q k[]={0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2},v[]={0x6a09e667,0xbb67ae85,0x3c6ef372,0xa54ff53a,0x510e527f,0x9b05688c,0x1f83d9ab,0x5be0cd19};a(X*x){q a,b,c,d,e,f,g,h,i,t,z,m[64];G(z=0,16)m[i]=J(z++)<<24|J(z++)<<16|J(z++)<<8|J(z++);G(i,64)m[i]=S(m[i-2],17,19,10)+m[i-7]+S(m[i-15],7,18,3)+m[i-16];A(W);G(0,64){t=h+(R(e,6)^R(e,11)^R(e,25))+(e&f^~e&g)+k[i]+m[i];z=(R(a,2)^R(a,13)^R(a,22))+(a&b^a&c^b&c);h=g;g=f;f=e;e=d+t;d=c;c=b;b=a;a=t+z;}A(Y)}i(X*x){x->l=*x->b=x->b[1]=0;A(Q)}p(X*x,char*w,q l){q t,i;G(0,l){J(x->l)=w[i];if(++x->l==64){a(x);D(*x->b,512)x->l=0;}}}f(X*x,char*h){q i=x->l;if(i<56){J(i++)=128;G(i,56)J(i)=0;}else{J(i++)=128;G(i,64)J(i)=0;a(x);G(0,56)J(i)=0;}D(*x->b,x->l*8)A(T)a(x);G(0,4){A(Z)}}

I took the reference implementation and started golfing, my target was under 2k.

Can be improved if somebody knows how to generate the constants (cube root of primes, I can't think of any golf-friendly way).

Usage:

X ctx;
unsigned char hash[32];

i(&ctx);                    // initialize context
p(&ctx,text,strlen(text));  // hash string
f(&ctx,hash);               // get hash

J, 458 445 443 438 435 430 421 bytes

3 :0
B=.32#2
A=.B&#:
P=.+&#.
'H K'=.A<.32*&2(-<.)2 3%:/p:i.64
for_m._512]\(,(1{.~512|448-#),(,~B)#:#)y
do.w=.(,B#:(15&|.~:13&|.~:_10|.!.0])@(_2&{)P/@,(25&|.~:14&|.~:_3|.!.0])@(_15&{),_7 _16&{)^:48]_32]\m
'a b c d e f g h'=.H=.8{.H
for_t.i.64
do.u=.A]P/((e<g)~:e*f),h,(~:/26 21 7|."{e),t{&>K;w
v=.A(a*b)P(c*a~:b)P~:/30 19 10|."{a
h=.g
g=.f
f=.e
e=.A]d P u
d=.c
c=.b
b=.a
a=.A]u P v
end.
H=.A]H P a,b,c,d,e,f,g,:h
end.
,H
)

Try it online!

This is a monadic verb that takes a list of bits as input and outputs a list of bits. On TIO, conversion from string to list of bits for the input and list of bits to hexadecimal is implemented for convenience. To test other input, just modify the text in the input field.

Python 2, 519 bytes

Q=2**32
G=lambda e:[int(x**e%1*Q)for x in range(2,312)if 383**~-x%x<2]
H=G(.5)[:8]
r=lambda v,b:v>>b|v<<32-b
M=input()
l=len(M)
M+=bin(l|1<<(447-l)%512+64)[2:]
while M:j=0;a,b,c,d,e,f,g,h=H;exec"H+=int(M[:32],2),;M=M[32:];"*16+"x=H[-15];y=H[-2];H+=(H[-16]+H[-7]+(r(y,17)^r(y,19)^y>>10)+(r(x,7)^r(x,18)^x/8))%Q,;"*48+"u=(r(e,6)^r(e,11)^r(e,25))+(e&f^~e&g)+h+G(1/3.)[j]+H[j+8];X=a,b,c,d,e,f,g,h=(u+(r(a,2)^r(a,13)^r(a,22))+(a&b^a&c^b&c))%Q,a,b,c,(d+u)%Q,e,f,g;j+=1;"*64;H=tuple(a+b&Q-1for a,b in zip(H,X))
print"%08x"*8%H

I was working off the pseudocode, but some parts just ended up being the same as the golfed reference Mego posted since there's not much to golf (e.g. the constant tables, for which the only real golf was a <2 instead of ==1). Over 100 bytes down though, but I'm sure there's still more to be gotten.

Input/output is also string of bits to hex string.

Python 2, 633 bytes

n=range
f=2**32
q=512
r=lambda v,b:v%f>>b|(v<<32-b)%f
t=int
g=lambda e:[t(x**e%1*f)for x in n(2,312)if 383**~-x%x==1]
h=g(.5)
k=g(1/3.)
m=map(t,input())
l=len(m)
m+=[1]+[0]*((447-l)%q)+map(t,'{:064b}'.format(l))
for i in n(l/q+1):
 c=m[q*i:][:q];w=[t(`c[j*32:][:32]`[1::3],2) for j in n(16)];x=h[:8]
 for j in n(48):a,o=w[j+1],w[j+14];w+=[(w[j]+(r(a,7)^r(a,18)^(a>>3))+w[j+9]+(r(o,17)^r(o,19)^(o>>10)))%f]
 for j in n(64):a,o=x[::4];d=x[7]+(r(o,6)^r(o,11)^r(o,25))+(o&x[5]^~o&x[6])+k[j]+w[j];e=(r(a,2)^r(a,13)^r(a,22))+(x[1]&a|x[2]&a|x[1]&x[2]);x=[d+e]+x[:7];x[4]+=d
 h=[(H+W)%f for H,W in zip(h,x)]
print''.join('%08x'%H for H in h)

This solution is the result of a collaboration between myself, Leaky Nun, and Mars Ultor. As such, I've made it community wiki out of fairness. It takes input as a binary string wrapped in quotes (e.g. '011000010110001001100011' for abc) and outputs a hex string.