# Short analysis of the 10 million PW file that was fortunately released by Mark Burnett into the public. # For details see https://xato.net/passwords/ten-million-passwords # # Rainhard Findling # u'smile.at Josef Ressel Center for User-friendly Secure Mobile Environments # 2015/02 # width = 6 height = 4 data <- read.csv('10-million-combos-utf8.txt', sep='\t', colClasses='character') # pw length pw_length <- lapply(data[,2],nchar) svg('pw_length.svg', width=width, height = height) hist(as.numeric(pw_length), xlab='Password length', breaks = 40) dev.off() # pw usage frequency usage_frequency <- table(data[,2]) svg('usage_frequency.svg', width=width, height = height) hist(log(usage_frequency), xlab='Password usage frequency', breaks=20) dev.off() # most frequently used passwords most_used_passwords <- usage_frequency[order(usage_frequency, decreasing=T)] fun_most_used_pw_pos <- function(lower, upper) { svg(paste('most_used_passwords_', lower, '_', upper, '.svg', sep=''), width=width, height = height) barplot(most_used_passwords[lower:upper], cex.names = cex.names, main = paste('Most used passwords (#', lower, ' to #', upper, ')', sep='')) dev.off() } width = 9 cex.names = 0.7 fun_most_used_pw_pos(1, 10) fun_most_used_pw_pos(30, 40) fun_most_used_pw_pos(100, 110) fun_most_used_pw_pos(300, 310) fun_most_used_pw_pos(1000, 1010) fun_most_used_pw_pos(3000, 3010) # most frequently used pws of length X fun_most_used_pw <- function(len) { most_used_passwords_lenx <- table(data[pw_length == len,2]) most_used_passwords_lenx <- most_used_passwords_lenx[order(most_used_passwords_lenx, decreasing = T)] svg(paste('most_used_passwords_len', len, '.svg', sep=''), width=width, height = height) barplot(most_used_passwords_lenx[1:10], cex.names = cex.names, main = paste('Most used passwords length ', len, sep='')) dev.off() } width = 11 fun_most_used_pw(4) fun_most_used_pw(5) fun_most_used_pw(6) fun_most_used_pw(7) fun_most_used_pw(8) fun_most_used_pw(9) fun_most_used_pw(10) fun_most_used_pw(11) fun_most_used_pw(12) width = 13 fun_most_used_pw(13) fun_most_used_pw(14) fun_most_used_pw(15) fun_most_used_pw(16)