86

On Linux, what is the sixth character of the password hash stored in /etc/shadow?

On my puppy style linux box, if I try to generate 100 random passwords using shuf and /dev/urandom, then the sixth character is / about half the time.

My question is not for production purpose, since I boot it up every time fresh from CD. Does this mean that my system is misconfigured or insecure in some way?

I ran file on shuf to see if it was a busybox link.

file /usr/bin/shuf

    shuf: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 2.6.32, stripped

I don't think that shuf is a busybox link here.

ls -l /usr/bin/shuf

    -rwxr-xr-x 1 root root 41568 Mar  7  2015 /usr/bin/shuf

while

ls -l /bin/wget

    lrwxrwxrwx 1 root root 14 Apr 29 03:49 wget -> ../bin/busybox

Here is a rough idea of what I did:

# ! / b i n / b a s h
##  don't try this on any real computer
##  this is not a production script, it is just psuedo code
##  with pseudo results to illustrate a point

##  for this run of 100 ?random? passwords,
##  46 of the 6th character of the hash stored in
##  '/ect/shadow' were '/'

function is_this_really_a_random_password () {
PERHAPS_RANDOM=''
for (( Z=0 ; Z<=8 ; Z++ )) do
PERHAPS_RANDOM="$PERHAPS_RANDOM$( shuf --head-count=1 --random-source=/dev/urandom $FILE_OF_SAFE_CHARACTERS )"
done
echo "$USER_NAME:$PERHAPS_RANDOM" | chpasswd
}

rm sixth-character-often-forward-slash.txt
for (( I=1; I<=100; I++ )) do
is_this_really_a_random_password
grep --regexp=root /etc/shadow | cut --characters=-40 >> sixth-character-often-forward-slash.txt
done
    root:$5$56YsS//DE$HasM6O8y2mnXbtgeE64zK
    root:$5$ho8pk/4/A6e/m0eW$XmjA5Up.0Xig1e
    root:$5$jBQ4f.t1$vY/T/1kX8nzAEK8vQD3Bho
    root:$5$BJ44S/Hn$CsnG00z6FB5daFteS5QCYE
    root:$5$Jerqgx/96/HlV$9Wms5n1FEiM3K93A8
    root:$5$qBbPLe4zYW$/zXRDqgjbllbsjkleCTB
    root:$5$37MrD/r0AlIC40n6$8hplf2c3DgtbM1
    root:$5$.4Tt5S6F.3K7l7E$dAIZzFvvWmw2uyC
    root:$5$A4dX4ZlOoE$6axanr4GLPyhDstWsQ9B
    root:$5$HXAGhryJ/5$40tgmo7q30yW6OF7RUOE
    root:$5$EzNb9t5d$/nQEbEAQyug7Dk9X3YXCEv
    root:$5$HHS5yDeSP$LPtbJeTr0/5Z33vvw87bU
    root:$5$sDgxZwTX5Sm$6Pzcizq4NcKsWEKEL15
    root:$5$FK1du/Paf/$hAy8Xe3UQv9HIpOAtLZ2
    root:$5$xTkuy/BLUDh/N$/30sESA.5nVr1zFwI
    root:$5$PV4AX/OjZ$VU8vX651q4eUqjFWbE2b/
    root:$5$iDuK0IUGijv4l$cdGh8BlHKJLYxPB8/
    root:$5$0DEUp/jz$JBpqllXswNc0bMJA5IFgem
    root:$5$Wz3og/W3Jra/WKA.$6D7Wd4M1xxRDEp
    root:$5$ntHWB.mC3x$Kt4DNTjRZZzpbFvxpMxP
    root:$5$g/uEc/cq$Ptlgu8CXV.vrjrmuok9RRT
    root:$5$/XAHs/5x$Z9J4Zt4k6NxdjJ27PpLmTt
    root:$5$mgfbZeWD0h/$UDGz8YX.D85PzeXnd2K
    root:$5$f4Oh3/bF2Ox/eN$xt/Jkn0LxPnfKP8.
    root:$5$J0mZZXGJG7/v$e16VxghNvZZKRONown
    root:$5$SNza9XFl9i$Qq7r/N6Knt2j74no8H0x
    root:$5$aFCu//xiL$Ocn9mcT2izcnm3rUlBOJg
    root:$5$kMkyos/SLZ/Mm6$wNYxZ9QeuJ8c8T.o
    root:$5$ujXKC/Xnj0h/nQ$PUmePvJZr.UXmTGK
    root:$5$wtEhA/YKaTKH$6VCSXUiIdsfelkCYWV
    root:$5$I1taRlq59YZUGe$4OyIfByuvJeuwsjM
    root:$5$N54oH//j4nbiB$K4i6QOiS9iaaX.RiD
    root:$5$ps8bo/VjPGMP0y4$NTFkI6OeaMAQL7w
    root:$5$IRUXnXO8tSykA8$NatM5X/kKHHgtDLt
    root:$5$VaOgL/8V$m45M9glUYnlTKk8uCI7b5P
    root:$5$/lPDb/kUX73/F3$jJL.QLH5o9Ue9pVa
    root:$5$/sHNL/tVzuu//cr$QasvQxa02sXAHOl
    root:$5$hGI.SMi/7I$fYm0rZP0F5B2D1YezqtX
    root:$5$WsW2iENKA$4HhotPoLRc8ZbBVg4Z5QW
    root:$5$cN6mwqEl$q5S3U85cRuNHrlxS9Tl/PC
    root:$5$wwzLR/YMvk5/7ldQ$s3BJhq5LyrtZww
    root:$5$GUNvr/d15n8/K$CiNHwOkAtxuWJeNy1
    root:$5$nGE75/8mEjM/A$pD/84iLunN/ZNI/JK
    root:$5$77Dn2dHLS$d5bUQhTz.OU4UA.67IGMB
    root:$5$EWrI//1u$uubkPk3YhAnwYXOYsvwbah
    root:$5$Hzfw1UCudP/N/U$Rjcdzdbov1YgozSJ
    root:$5$2y8CKTj.2eTq$7BEIgMWIzAJLl1SWBv
    root:$5$lcWsD/42g8zEEABA$r/vGxqqUZTkJ0V
    root:$5$LPJLc/Xz$tnfDgJh7BsAT1ikpn21l76
    root:$5$ucvPeKw9eq8a$vTneH.4XasgBIeyGSA
    root:$5$Fwm2eUR7$ByjuLJRHoIFWnHtvayragS
    root:$5$yBl7BtMb$KlWGwBL6/WjgHVwXQh9fJS
    root:$5$1lnnh2kOG$rdTLjJsSpC3Iw4Y6nkPhq
    root:$5$WfvmP6cSfb066Z$1WvaC9iL11bPCAxa
    root:$5$qmf/hHvalWa4GE25$m3O2pdu25QBCwU
    root:$5$4P.oT/9HQ$Ygid4WXi0QCEObLVNsqFZ
    root:$5$FNr4Bkj56Y$38mG7mKV0mdb1PMCxrVd
    root:$5$hoNcyURtV$aTidBWHjngc1I0vUTi5bB
    root:$5$rzHmykYT$ATiXdUDUvUnB2fNMUQgwvE
    root:$5$o11Yb/ZQv2/k3wg9$5yShpVejDBk6HB
    root:$5$REPGN//y9H$awpPmUvCqvi6Bd/6bQxF
    root:$5$HbAEY/djXJx$y56GhMwavd7xTQ.jPg6
    root:$5$3T1k5.LZUcy$Cup.LM5AnaBTIaJtBnF
    root:$5$wXaSC/P8bJ$y/0DoYJVjaP09O6GWiki
    root:$5$YuFfY8QPqm/dD$IIh0/tyn.18xEBl5Y
    root:$5$uTTBpjsKG//3Et8$9ibN9mVwSeVyOI4
    root:$5$dASlMLzbVbFMnZ$N4uGBwGHhdg93z/V
    root:$5$03.FA/LnRBb.k7Zl$XOHU2ZlHkV9oz9
    root:$5$2zL1p/VDCi$/QRT7Bo3cZ3Rxb8Y7ddo
    root:$5$0NpZqZs/qt/jIv.$8W/TTM3Gy2UMOWy
    root:$5$a4SXynoro7ucT$qFM2C79QJ15jQ0ZlL
    root:$5$RL0Eg/jroH8/ONP$EzceXz.pz74k104
    root:$5$O3R5V/n1$U.mmCTbpID8xMXbvtzd4ch
    root:$5$0T2nVrv/P/xaRwUD$YVm17XF8kTsL0f
    root:$5$2bRwMNIXobZwn$Q228FJqg6/iRCe9GQ
    root:$5$PyYgL/axfgj/$uaL5y/kdzU4Kzi.JlB
    root:$5$A6QtfJdJ4Gwvx4$d4PA5AJ0806NzRnm
    root:$5$H8Mta5LDgGXp$QGdOJh.bFWgR3L719Z
    root:$5$H06URjv4BtOAbA$EJs1mZYhdKIVgCmn
    root:$5$OeB.O/GrmFB/az$SoE759KE9WIE17Uf
    root:$5$huiB9/sk$el3XMf7SGX81LnD3.SaF8J
    root:$5$fO7tfM.fjdSHA8G6$s.QIjfNniCzFdU
    root:$5$32at3SQJAD/xlw$HbXmBLVXTTyZfxQv
    root:$5$FHBFL/QdFl$FMipxpW0HlEFUIAr7IxF
    root:$5$sHvKf/M5OPdBuZZ$dz4qLOkTLGeCINX
    root:$5$hw4Vu/e34$/82lXu7ISrse.Ihk.qbqT
    root:$5$k1JOy/jRWZ$30YSk7kbhdKOjfDaiWVf
    root:$5$MnX.LUzqrB/B2$JuwqC.SmKFnMUWkEf
    root:$5$arRYf/PG$Xw6PpZNFO656p.Eb636iLt
    root:$5$5op/p8Hqs5$Nj2jA0Qxm80aG4fHW3oz
    root:$5$VHIT9/8yzZ$CpIK4ODps78GcqcsgiMT
    root:$5$.AlH7jBJoh/8$sjuVt.PcRH.vyvB3og
    root:$5$f7Ewinqm$nrJ2p/hKTuiEK//IfCTjth
    root:$5$N.dv/VCvrCADg$peSXfo35KN1dmbw/n
    root:$5$PSc4W./54l/SroH$CFFVOHRYK.Jj8Sp
    root:$5$8UBP3f4IcnAd/N1/$P.ud49qTStQ7Lw
    root:$5$qnXsZ/NlLZh/$nlaQVTS3FCJg1Jb2QG
    root:$5$xOpbbBqENR/7$boYJQzkCkZhRf7Uicf
    root:$5$V93tjZhzT$LrsIZWZmYo4ocRUvCixO6
    root:$5$1MVz8/lf5oC/$rUKpnX23MhFx4.y2ZS

Roughly half of the 6th hash characters are /:

cat sixth-character-often-forward-slash.txt | cut --character=14 | sort


    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    /
    .
    .
    .
    .
    2
    5
    6
    8
    8
    B
    d
    D
    e
    e
    E
    f
    H
    I
    j
    j
    j
    J
    k
    k
    K
    l
    L
    M
    M
    n
    n
    N
    q
    r
    r
    r
    s
    S
    S
    t
    t
    T
    U
    U
    U
    U
    V
    w
    x
    X
    X
    X
    Z
    Z
    Z
insecure
  • 761
  • 3
    man 3 crypt and read the NOTES section for a full description of this field. – Stephen Harris Aug 04 '16 at 16:54
  • (Left as a hint for someone who wants to fully investigate): There are 64 characters used in the password field; so likely some base64 variant is used. So I'd guess what you're seeing is base64 padding, but it's odd that it's not at the end of the salt then... – derobert Aug 04 '16 at 17:33
  • 1
    With Debian testing & mkpasswd, this doesn't happen—curious if it happens with mkpasswd for you (as that'd be easier to test than actually setting the root password): for ((i=0; i<50; ++i)); do pwgen -1 -s 16 | mkpasswd -m sha-256 --stdin ; done | cut -c9 | sort | uniq -c – derobert Aug 04 '16 at 17:42
  • I can't reproduce this on current Ubuntu 14.04 or 16.04. Slash is near the middle of the frequency list (used @derobert snippet above with 5000 loops instead of 50) but it still looks a bit non-uniform, Most frequent char (q) @ 2.00% of total is 1.63 more frequent than the least frequent one (r) @ 1.22% of total. – arielf Aug 08 '16 at 18:55
  • @arielf I doubt that is a statistically-significant. You'd have to do something like a χ2 test to be sure, but that's well outside [unix.se] territory and getting into [stats.se] territory. – derobert Aug 09 '16 at 18:10
  • Yes, you are right. Here's a chart of the distribution: https://imgur.com/a/BsQE3 - looks pretty uniform with the deviation from the average "close enough" to looking gaussian. – arielf Aug 10 '16 at 06:20

3 Answers3

88

Hash format and source

The format of the password hash is $<type>$<salt>$<hash>, where <type> 5 is an SHA-256 based hash. The salt is usually at least 8 characters, (and is in the examples in the question) so the sixth character is part of the salt.

Those hashes are likely generated by a version of the shadow tool suite (src package shadow in Debian, shadow-utils in CentOS)

I tried to find out why, exactly, the code biases the slash. (thanks to @thrig for originally digging up the code.)

TLDR: It's a bit interesting, but doesn't matter.


The code generating the salt

In libmisc/salt.c, we find the gensalt function that calls l64a in a loop:

strcat (salt, l64a (random()));
do {
       strcat (salt, l64a (random()));
} while (strlen (salt) < salt_size);

The loop takes a random number from random(), turns it into a piece of a string, and concatenates that to the string forming the salt. Repeat until enough characters are collected.

What happens in l64a is more interesting though. The inner loop generates one character at a time from the input value (which came from random()):

for (i = 0; value != 0 && i < 6; i++) {
    digit = value & 0x3f;

    if (digit < 2) {
        *s = digit + '.';
    } else if (digit < 12) {
        *s = digit + '0' - 2;
    } else if (digit < 38) {
        *s = digit + 'A' - 12;
    } else {
        *s = digit + 'a' - 38;
    }

    value >>= 6;
    s++;
}

The first line of the loop (digit = value & 0x3f) picks six bits from the input value, and the if clauses turn the value formed by those into a character. (. for zero, / for a one, 0 for a two, etc.)

l64a takes a long but the values output by random() are limited to RAND_MAX, which appears to be 2147483647 or 2^31 - 1 on glibc. So, the value that goes to l64a is a random number of 31 bits. By taking 6 bits at a time or a 31 bit value, we get five reasonably evenly distributed characters, plus a sixth that only comes from one bit!

The last character generated by l64a cannot be a ., however, since the loop also has the condition value != 0, and instead of a . as sixth character, l64a returns only five characters. Hence, half the time, the sixth character is a /, and half the time l64a returns five or fewer characters. In the latter case, a following l64a can also generate a slash in the first positions, so in a full salt, the sixth character should be a slash a bit more than half the time.

The code also has a function to randomize the length of the salt, it's 8 to 16 bytes. The same bias for the slash character happens also with further calls to l64a which would cause the 11th and 12th character to also have a slash more often than anything else. The 100 salts presented in the question have 46 slashes in the sixth position, and 13 and 15 in the 11th and 12th position, respectively. (a bit less than half of the salts are shorter than 11 characters).

On Debian

On Debian, I couldn't reproduce this with a straight chpasswd as shown in the question. But chpasswd -c SHA256 shows the same behaviour. According to the manual, the default action, without -c, is to let PAM handle the hashing, so apparently PAM on Debian at least uses a different code to generate the salt. I didn't look at the PAM code on any distribution, however.

(The previous version of this answer stated the effect didn't appear on Debian. That wasn't correct.)

Significance, and requirements for salts

Does it matter, though? As @RemcoGerlich commented, it's pretty much only a question of encoding. It will effectively fix some bits of the salt to zero, but it's likely that this will have no significant effect in this case, since the origin of those bits is this call to srandom in seedRNG:

srandom (tv.tv_sec ^ tv.tv_usec ^ getpid ());

This is a variant of ye olde custom of seeding an RNG with the current time. (tv_sec and tv_usec are the seconds and microseconds of the current time, getpid() gives the process id if the running process.) As the time and PIDs are not very unpredictable, the amount of randomness here is likely not larger than what the encoding can hold.

The time and PID is not something you'd like to create keys with, but might be unpredictable enough for salts. Salts must be distinct to prevent brute-force testing multiple password hashes with a single calculation, but should also be unpredictable, to prevent or slow down targeted precomputation, which could be used to shorten the time from getting the password hashes to getting the actual passwords.

Even with the slight issues, as long as the algorithm doesn't generate the same salt for different passwords, it should be fine. And it doesn't seem to, even when generating a couple dozen in a loop, as the list in the question shows.

Also, the code in question isn't used for anything but generating salts for passwords, so there are no implications about problems elsewhere.

For salts, see also, e.g. this on Stack Overflow and this on security.SE.

Conclusion

In conclusion, there's nothing wrong with your system. Making sure your passwords are any good, and not used on unrelated systems is more useful to think about.

ilkkachu
  • 138,973
  • 2
    So the actual salts aren't biased, it's just an artifact of how they are encoded and it doesn't have any security implications, is that correct? – RemcoGerlich Aug 05 '16 at 06:51
  • 9
    @RemcoGerlich This is the textbook definition of a bias. Because not all the bits are being uniformly accounted for. This also does have implications in other projects where this code is used in a non-salt context. As salts in passwords for /etc/shadow, it's not a showstopper, but it is concern. – Aaron Toponce Aug 05 '16 at 16:52
  • @RemcoGerlich, Pretty much yes. Okay, that's not a strong RNG, so we could talk about biases in that. But security-wise it doesn't matter for a salt. – ilkkachu Aug 06 '16 at 11:40
  • 4
    You've misinterpreted the security.SE post that you linked to, and the accepted answer to the SO post you linked to is wrong, which is why there is another answer with more than 10x the votes contradicting it. The statement that "salts only need to be distinct" isn't true; the entropy of a salt matters, because that's what controls by how much it increases the difficulty of precomputation. These salts have less entropy than they could for their length, due to the bias. Not critically less, but something like 5 bits less. It is a flaw. – hobbs Aug 07 '16 at 07:34
  • Perhaps someone should make a security.SE question specifically referencing this function to get a qualified opinion. – hobbs Aug 07 '16 at 07:42
  • @hobbs, well, yes, I did gloss over that. You're right about the possibility of precomputing if the salt is known, it is actually mentioned in one of the security.SE answers. Though even with known (but distinct) salts, the precomputation would need to be done for each hash separately, preventing parallel cracking, which I understand is pretty much the main point. I'll go and edit that (but not right now, I need more coffee). Thanks for the comment! – ilkkachu Aug 07 '16 at 10:33
  • @hobbs, As for the loss of entropy within the encoding, I didn't think it mattered since the source of all the "randomness" is from the seed given to srandom, i.e. the current time and PID, which might not be that much, since the account creation time may well be guessable. Though giving 16 unknown bits for the PID and 20 for microseconds (full-range) we'd start to be close. But I'd rather not go too deep in estimating that entropy... – ilkkachu Aug 07 '16 at 10:34
  • @hobbs, edited... – ilkkachu Aug 08 '16 at 11:51
27

That character is part of the salt per the crypt(3) manual. Given that the length of the salt (the string between the $5$ ID and subsequent $) varies for the exhibited hashes, I'm not exactly sure what picking a random character from that particular column for a few passwords illustrates.

On the other hand, the / is rather more prevalent (102 instances) in the entire salt compared to the other possible characters (around 18), so something in chpasswd does appear to favor that character in the salt;

for x in `seq 1 100000`; do
  echo testacct:asdfasdfasdf | chpasswd -c SHA256
  awk -F: '/testacct/{print $2}' /etc/shadow | awk -F\$ '{print $3}' >> salts
done
perl -nle 'print for m/(.)/g' salts | sort | uniq -c | sort -nr | head -5

on a RedHat EL 6 system run turns up:

   1006 /
    195 X
    193 U
    193 q
    193 e

And, yes, the code in shadow-utils-4.1.5.1-5.el6 exhibits a bias towards / which might make dictionary attacks easier:

#include <sys/time.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

// these next two borrowed from libmisc/salt.c of shadow-4.1.5.1 from
// Centos 6.8 RPM at http://vault.centos.org/6.8/os/Source/SPackages/shadow-utils-4.1.5.1-5.el6.src.rpm
char *l64a(long value)
{
    static char buf[8];
    char *s = buf;
    int digit;
    int i;

    if (value < 0) {
        abort();
    }

    for (i = 0; value != 0 && i < 6; i++) {
        digit = value & 0x3f;

        if (digit < 2) {
            *s = digit + '.';
        } else if (digit < 12) {
            *s = digit + '0' - 2;
        } else if (digit < 38) {
            *s = digit + 'A' - 12;
        } else {
            *s = digit + 'a' - 38;
        }

        value >>= 6;
        s++;
    }

    *s = '\0';

    return (buf);
}

static void seedRNG(void)
{
    struct timeval tv;
    static int seeded = 0;

    if (0 == seeded) {
        (void) gettimeofday(&tv, NULL);
        srandom(tv.tv_sec ^ tv.tv_usec ^ getpid());
        seeded = 1;
    }
}

int main(void)
{
    seedRNG();
    for (int x = 0; x < 1000; x++) {
        printf("%s\n", l64a(random()));
    }

    exit(0);
}

Which results in:

% ./salttest | perl -nle 'print for m/(.)/g' | sort | uniq -c | sort -nr | head -3
 593 /
  96 8
  93 3

And then using the same routines from the latest https://github.com/shadow-maint/shadow/blob/master/libmisc/salt.c we find that there's still a bias towards /. So uh yeah this is a bug that should be patched so / is not favored so much, as ideally the salt characters should be equally weighted.

thrig
  • 34,938
  • I guess the OP could see this as evidence that the salt might be insufficiently random. – Ulrich Schwarz Aug 04 '16 at 17:24
  • in the sixth column, the characters selected from [a-zA-Z0-9./] seem to be skewed. 46 out of 100 are '/'. all the other characters from the set are only used a few times. – insecure Aug 04 '16 at 17:27
  • Busybox uses a linear congruency generator, it seems: https://git.busybox.net/busybox/plain/libbb/crypt_make_salt.c?h=1_6_stable I wonder if the lots of 1-bits (which would lead to / in the base64-coding) stem from the general size of time() right now, because there's not really much entropy going in there. – Ulrich Schwarz Aug 04 '16 at 17:48
  • That is some shockingly bad code, especially considering the security-sensitive nature. Suggest RedHat post it to [codereview.se] :-/ – derobert Aug 04 '16 at 21:01
  • 14
    Biases in the salt are not in themselves harmful (unlike biases in, say, a key). The salt only needs to be unique, it doesn't need to be unpredictable. A salt consisting of a MAC address (or something uniquely identifying the machine) and the time (assuming the clock doesn't go backward) would be fine. The statement that “ideally the salt characters should be equally weighted” is wrong. – Gilles 'SO- stop being evil' Aug 04 '16 at 22:50
  • @Gilles disagree; a predictable salt makes dictionary attacks feasible or more likely, depending on how bad the bias is; one based on the MAC address and time is horrible, as the MAC address could easily be leaked via IPv6, and the password change time known via other channels, and then a dictionary attack made that much more simpler when the password hashes are carried off. A uniform distribution of all the salt characters offers no such weaknesses. – thrig Aug 04 '16 at 23:25
  • 7
    @thrig No, a predictable salt does not help with dictionary attacks, because a salt does not help with dictionary attacks as such. A salt helps with attacks that target multiple accounts (more precisely: multiple hashes — also successive hashes on the same account), and for this, all that matters is that the salt is distinct for the different accounts. The unpredictability of salts is irrelevant, only their uniqueness (actually, even a low repeat count is good enough). – Gilles 'SO- stop being evil' Aug 05 '16 at 00:00
  • 2
    (Ok, there is a very slight advantage if the salt is known, which is that the attacker can start breaking passwords before they find the hash, but that's marginal, and even a poorly distributed salt is sufficient in that respect.) See http://security.stackexchange.com/questions/211/how-to-securely-hash-passwords , http://security.stackexchange.com/questions/17421/how-to-store-salt and many other threads on [security.se]. – Gilles 'SO- stop being evil' Aug 05 '16 at 00:02
  • 3
    The key point of what Gilles is saying is that there's a lot of room for a salt generator to be poor but not nearly so bad that there are actual collisions in the same shadow file (or across multiple systems that an attacker might be attacking at once). This is all that matters for salt to work. It only takes a bit of randomness to defeat Rainbow tables. – Peter Cordes Aug 05 '16 at 02:49
  • 8
    However, if the salt is badly generated, it doesn't inspire confidence in the rest of the crypto code. – user253751 Aug 05 '16 at 06:29
  • 4
    With salts, they need to be globally unique. They don't need to be random, and they don't need to be secret. But they do need to be globally unique. It turns out, this is harder to do if you try to increment a counter, or create some fancy deterministic algorithm, than just grabbing random bits from the OS RNG. If you generate 16 base64 characters randomly, then you have a n/64^16 chance of collisions. Of course, the whole point of salts, is to make rainbow table attacks fruitless. In this case, a 16-character base64 salt space would be 64^15 < n < 64^16. Not a showstopper, but easily fixed. – Aaron Toponce Aug 05 '16 at 17:04
  • @AaronToponce I would not waste my precious pool of entropy generating each and every salt. Instead, one time put a few bytes of true randomness in a config file readable only by root, and each time a user is added to the system, take that hidden random blob, append both the username and numeric UID, time_t of the password-setting operation, and the PID of the process doing it; then run the result through the same hash function that will subsequently be used to hash the salted password (since it has to be in the routine anyway) to produce the salt. Guaranteed unique/unpredictable – Monty Harder Aug 05 '16 at 18:21
  • @immibis This salt is only badly generated if you assume it has to be unpredictable. Suppose that the authors knew what they were doing and expended the right amount of effort in the salt-generation routine. This code would conform with that theory. – user394 Aug 05 '16 at 18:32
  • 4
    @MontyHarder Entropy isn't a type of gasoline- you don't consume it, just like you don't consume temperature. It's a guess at the unpredictable state of your machine, just like temperature is a guess at the energy of gas molecules in a closed volume. Regardless, you should be pulling from /dev/urandom, which after properly seeded, is indistinguishable from true random noise. Also, rolling your own design rather than grabbing data from the RNG is almost guaranteeing to shoot yourself in the foot. Just use /dev/urandom. – Aaron Toponce Aug 05 '16 at 19:32
  • 1
    @user394 My point is: The salt is a random number. They managed to screw up generating random numbers. Generating random numbers is not difficult. What else did they screw up? – user253751 Aug 05 '16 at 23:40
  • @immibis I don't know the ins and outs of encryption, but people here are pointing out that the salt /is not/ a random number-- it need not be, and from what layprogrammer's understanding I have, their arguments make sense. So I don't see how they screwed up making a unique number. You seem to be holding them to an over-engineered armchair standard. – user394 Aug 06 '16 at 02:13
  • @derobert OT, but what is shockingly bad about that code? Just the worst thing briefly would be good, I'm not looking to debate. – hyde Aug 06 '16 at 16:01
  • @hyde on x86_64/amd64 (i.e., almost everyone's machine), long is 64 bits. It's a good thing RAND_MAX happens to be 2^31-1, because if it actually returned a long worth of random, that function would buffer overflow buf[8]. I guess that's not really shockingly bad... But still I'd expect better, especially in security-sensitive code. (Also, would have to work out the "entropy" in the srand vs. the amount of salt generated, might have a higher risk of collision than first appears, especially for creating a bunch of accounts at once) – derobert Aug 06 '16 at 16:11
  • @Gilles the statement "ideally the characters should be equally weighted" isn't wrong. A bias in the salt means advance knowledge that some salts are more likely than others, which means an advantage in a pre-computation attack. If it was a large bias, then it would be a huge advantage (imagine if every character of the salt was / 50% of the time, you would have a reasonably small number of salts you could pre-hash with and stand a chance of breaking some passwords), but since this is only a small bias it's fairly insignificant... but definitely "non-ideal". – hobbs Aug 07 '16 at 07:11
  • @derobert, the loop tests for i < 6, so it won't overflow. – ilkkachu Aug 07 '16 at 10:39
  • @derobert, also, yeah, if you started password creation operations with successive PIDs that ran one microsecond apart, you'd get the same salt for each one of them. Though I guess the code isn't meant for someone creating millions of accounts in a short time (and anyone doing that should think a bit about their tools anyway). As for creating a couple of accounts in a shell loop, the salts seem to at least be distinct. (and if one of them is unknown in advance, so are all) – ilkkachu Aug 07 '16 at 10:43
  • @ilkkachu ah, you're right, it does. I missed that. – derobert Aug 07 '16 at 11:21
5

mkpasswd(1) might be a front-end to crypt(3), but it's not the same as running chpasswd(1), which is part of the "shadow-utils" package on CentOS and "passwd" on Debian. Instead, you should compare apples-to-apples. Consider the following script:

#!/bin/bash

# This repeatedly changes a `saltuser' password
# and grabs the salt out of /etc/shadow.
# Requires root and the existence of `saltuser' user.

if [ $EUID -ne 0 ]; then
    echo "This script requires root access to read /etc/shadow."
    exit 1
fi

grep -q saltuser /etc/passwd

if [ $? -ne 0 ]; then
    echo "This script requires the 'saltuser' to be present."
    exit 2
fi

: > /tmp/salts.txt

for i in {1..1000}; do
    PW=$(tr -cd '[[:print:]]' < /dev/urandom | head -c 64)
    echo "saltuser:${PW}" | chpasswd -c SHA256 -s 0 2> /dev/urandom
    awk -F '$' '/^saltuser/ {print $3}' /etc/shadow >> /tmp/salts.txt
done

while read LINE; do
    # 6th character in the salt
    echo ${LINE:5:1}
done < /tmp/salts.txt | sort | uniq -c | sort -rn

Output from Debian Sid:

512 /
 14 T
 13 W
 13 v
 13 t
 12 x
 12 m
 12 d
 11 p
 11 L
 11 F
 11 4
 10 s
 10 l
 10 g
 10 f
 10 7
 10 6
  9 Z
  9 w
  9 N
  9 H
  9 G
  9 E
  9 A
  8 Y
  8 X
  8 r
  8 O
  8 j
  8 c
  8 B
  8 b
  8 9
  7 u
  7 R
  7 q
  7 P
  7 M
  7 k
  7 D
  6 z
  6 y
  6 U
  6 S
  6 K
  6 5
  5 V
  5 Q
  5 o
  5 J
  5 I
  5 i
  5 C
  5 a
  5 3
  4 n
  4 h
  4 e
  4 2
  4 0
  4 .
  3 8
  3 1

Output from CentOS 7:

504 /
 13 P
 13 B
 12 s
 12 Z
 11 e
 11 Y
 11 O
 11 L
 11 G
 10 w
 10 u
 10 q
 10 i
 10 h
 10 X
 10 I
 10 E
  9 x
  9 g
  9 f
  9 W
  9 F
  9 C
  9 9
  9 8
  8 v
  8 t
  8 c
  8 b
  8 S
  8 H
  8 D
  8 0
  7 z
  7 y
  7 o
  7 k
  7 U
  7 T
  7 R
  7 M
  7 A
  7 6
  7 4
  7 1
  6 p
  6 d
  6 a
  6 Q
  6 J
  6 5
  6 .
  5 r
  5 m
  5 j
  5 V
  5 3
  5 2
  4 n
  4 l
  4 N
  4 K
  3 7

So, the problem isn't unique to CentOS, but likely coming from upstream where both projects are pulling from.

  • Is : > /tmp/salts.txt the same as touch /tmp/salts.txt? : is a NOP, right? – someonewithpc Aug 05 '16 at 21:28
  • 2
    @someonewithpc It's the POSIX way for emptying a file. touch(1) creates a file if it doesn't exist, but just updates the modified timestamp if it does exist. It's not the correct way to empty a file. : > file will guarantee it exists and that it's empty. – Aaron Toponce Aug 05 '16 at 23:05
  • @Aaron, yes, you are absolutely correct. It's not completely specific to one distribution. – ilkkachu Aug 06 '16 at 10:46