Université Toulouse III - Paul Sabatier
PROJECT
REPORT
“Trying to decipher Vigenere”
MASTER 1
SECURITY
COMPUTER SCIENCE FOR AEROSPACE
FACULTY OF SCIENCES AND ENGINEERING
Submitted to: Submitted by:
Pamit DUGGAL
TOULOUSE, 2019.
The Python Code (It reads from a text file)
import re, math
spaces=[]
# To get the frequency counter for each letter
def getFrequencyList(test):
F =[0] * 26
k = 97
for c in test:
F[ord(c) - k]+=1
return F
# To compute the index of coincedence
def indexCo_eng(test):
F = getFrequencyList(test)
N = len(test)
tmp = 0
for i in range (26):
tmp+= F[i]*(F[i]-1)
return 1/(N*(N-1))* tmp
# To split a text sequence into n subsequences
def split_by_n(seq, n):
N= len(seq)
res = []
for i in range(n):
tmp = i
st=""
while tmp<N:
st+=seq[tmp]
tmp+=n
res.append(st)
return res
def get_Keylength(test):
seq = []
res= []
N = len(test)
for i in range (2,10):
seqi = split_by_n(test,i)
seqsum = 0
for s in seqi:
seqsum += indexCo_eng(s)
seqAvg = seqsum/len(seqi)
if seqAvg > 0.059 and seqAvg < 0.07:
print ("key is probably of length %d ( or 2x%d ) with avg
ic: %f" %(i,i, seqAvg))
getPassword(seqi,i)
break
else:
print('Key length %d with avg ic of %f' % (i, seqAvg))
def getPassword(li,key):
prop =
[0.08167,0.01492,0.02782,0.04253,0.12702,0.02228,0.02015,0.06094,0.06966
,0.00153,0.00772,
0.04025,0.02406,0.06749,0.07507,0.01929,0.00095,0.05987,0.06327,0.09056,
0.02758,0.00978,
0.02360,0.00150,0.01974,0.00074]
res = []
leng = 0
for test in li:
N=len(test)
leng+=N
mini=math.inf
best =test
index = 0
for x in range (26):
F = getFrequencyList(test)
count = 0
for i in range(26):
tmp = prop[i]*N
count += (( F[i] - tmp )**2)/tmp
if count<mini:
mini = count
best = test
index = x +1
test = increment_str(test)
res.append(best)
out=""
for i in range (leng):
out+=res[i%key][i//key]
tmp = list(out)
for s in spaces:
tmp.insert(s,' ')
out = ''.join(tmp)
print(out)
def increment_str(test):
s = list(test)
for i in range (len(s)):
if s[i]!='z':
s[i] = chr(ord(s[i])+1)
else:
s[i] = 'a'
return "".join(s)
with open('data.txt', 'r') as myfile:
data=myfile.read()
print("chipher text:")
print(data)
print('*'*100)
data = data.replace('\n', '') ## remove return to line from text
data= re.sub(r'[^\w]', ' ', data) ## remove any symbols from text
for i in range (len(data)):
if data[i]==' ':
spaces.append(i)
data=data.replace(' ', '') ## remove any spaces from text
data = data.lower() ## text in small letters
get_Keylength(data)
print('*'*100)
spaces=[]
with open('data2.txt', 'r') as myfile:
data=myfile.read()
print("chipher text:")
print(data)
print('*'*100)
data= data.replace('\n', '') ## remove return to line from text
data= re.sub(r'[^\w]', ' ', data) ## remove any symbols from text
for i in range (len(data)):
if data[i]==' ':
spaces.append(i)
data=data.replace(' ', '') ## remove any spaces from text
data = data.lower() ## text in small letters
get_Keylength(data)
Results
1. Display of the cipher text:
2. Deciphering and Displaying th etext
From keylengths 2 to 8, keylength 6 (ic: 0.069546) was the closest to the
English language coincidence index (ic: 0.0667). So we use the English letter
frequency to find the key for each successive n letters, where n=1,…,6.
We can see that it is not perfect, but close enough for us to decipher the
text.