How to encrypt text with a password in python?

Solution 1:

Here's how to do it properly in CBC mode, including PKCS#7 padding:

import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random

def encrypt(key, source, encode=True):
    key = SHA256.new(key).digest()  # use SHA-256 over our key to get a proper-sized AES key
    IV = Random.new().read(AES.block_size)  # generate IV
    encryptor = AES.new(key, AES.MODE_CBC, IV)
    padding = AES.block_size - len(source) % AES.block_size  # calculate needed padding
    source += bytes([padding]) * padding  # Python 2.x: source += chr(padding) * padding
    data = IV + encryptor.encrypt(source)  # store the IV at the beginning and encrypt
    return base64.b64encode(data).decode("latin-1") if encode else data

def decrypt(key, source, decode=True):
    if decode:
        source = base64.b64decode(source.encode("latin-1"))
    key = SHA256.new(key).digest()  # use SHA-256 over our key to get a proper-sized AES key
    IV = source[:AES.block_size]  # extract the IV from the beginning
    decryptor = AES.new(key, AES.MODE_CBC, IV)
    data = decryptor.decrypt(source[AES.block_size:])  # decrypt
    padding = data[-1]  # pick the padding value from the end; Python 2.x: ord(data[-1])
    if data[-padding:] != bytes([padding]) * padding:  # Python 2.x: chr(padding) * padding
        raise ValueError("Invalid padding...")
    return data[:-padding]  # remove the padding

It's set to work with bytes data, so if you want to encrypt strings or use string passwords make sure you encode() them with a proper codec before passing them to the methods. If you leave the encode parameter to True the encrypt() output will be base64 encoded string, and decrypt() source should be also base64 string.

Now if you test it as:

my_password = b"secret_AES_key_string_to_encrypt/decrypt_with"
my_data = b"input_string_to_encrypt/decrypt"

print("key:  {}".format(my_password))
print("data: {}".format(my_data))
encrypted = encrypt(my_password, my_data)
print("\nenc:  {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec:  {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
print("\nSecond round....")
encrypted = encrypt(my_password, my_data)
print("\nenc:  {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec:  {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))

your output would be similar to:

key:  b'secret_AES_key_string_to_encrypt/decrypt_with'
data: b'input_string_to_encrypt/decrypt'

enc:  7roSO+P/4eYdyhCbZmraVfc305g5P8VhDBOUDGrXmHw8h5ISsS3aPTGfsTSqn9f5
dec:  b'input_string_to_encrypt/decrypt'

data match: True

Second round....

enc:  BQm8FeoPx1H+bztlZJYZH9foI+IKAorCXRsMjbiYQkqLWbGU3NU50OsR+L9Nuqm6
dec:  b'input_string_to_encrypt/decrypt'

data match: True

Proving that same key and same data still produce different ciphertext each time.

Now, this is much better than ECB but... if you're going to use this for communication - don't! This is more to explain how it should be constructed, not really to be used in a production environment and especially not for communication as its missing a crucial ingredient - message authentication. Feel free to play with it, but you should not roll your own crypto, there are well vetted protocols that will help you avoid the common pitfalls and you should use those.

Solution 2:

Based on zwer's answers but shows an example attempt to deal with the case where the source text is exactly a multiple of 16 (AES.block_size). However @zwer explains in a comment how this code will BREAK THE ENCRYPTION of your text by not padding your source text appropriately, making your pipeline insecure.

Code:

from builtins import bytes
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random

def encrypt(string, password):
    """
    It returns an encrypted string which can be decrypted just by the 
    password.
    """
    key = password_to_key(password)
    IV = make_initialization_vector()
    encryptor = AES.new(key, AES.MODE_CBC, IV)

    # store the IV at the beginning and encrypt
    return IV + encryptor.encrypt(pad_string(string))

def decrypt(string, password):
    key = password_to_key(password)   
    
    # extract the IV from the beginning
    IV = string[:AES.block_size]  
    decryptor = AES.new(key, AES.MODE_CBC, IV)
    
    string = decryptor.decrypt(string[AES.block_size:])
    return unpad_string(string)

def password_to_key(password):
    """
    Use SHA-256 over our password to get a proper-sized AES key.
    This hashes our password into a 256 bit string. 
    """
    return SHA256.new(password).digest()

def make_initialization_vector():
    """
    An initialization vector (IV) is a fixed-size input to a cryptographic
    primitive that is typically required to be random or pseudorandom.
    Randomization is crucial for encryption schemes to achieve semantic 
    security, a property whereby repeated usage of the scheme under the 
    same key does not allow an attacker to infer relationships 
    between segments of the encrypted message.
    """
    return Random.new().read(AES.block_size)

def pad_string(string, chunk_size=AES.block_size):
    """
    Pad string the peculirarity that uses the first byte
    is used to store how much padding is applied
    """
    assert chunk_size  <= 256, 'We are using one byte to represent padding'
    to_pad = (chunk_size - (len(string) + 1)) % chunk_size
    return bytes([to_pad]) + string + bytes([0] * to_pad)
def unpad_string(string):
    to_pad = string[0]
    return string[1:-to_pad]

def encode(string):
    """
    Base64 encoding schemes are commonly used when there is a need to encode 
    binary data that needs be stored and transferred over media that are 
    designed to deal with textual data.
    This is to ensure that the data remains intact without 
    modification during transport.
    """
    return base64.b64encode(string).decode("latin-1")

def decode(string):
    return base64.b64decode(string.encode("latin-1"))

                              

                                                                                                                                                       

Tests:

def random_text(length):
    def rand_lower():
        return chr(randint(ord('a'), ord('z')))
    string = ''.join([rand_lower() for _ in range(length)])
    return bytes(string, encoding='utf-8')

def test_encoding():
    string = random_text(100)
    assert encode(string) != string
    assert decode(encode(string)) == string

def test_padding():
    assert len(pad_string(random_text(14))) == 16
    assert len(pad_string(random_text(15))) == 16
    assert len(pad_string(random_text(16))) == 32

def test_encryption():
    string = random_text(100)
    password = random_text(20)
    assert encrypt(string, password) != string
    assert decrypt(encrypt(string, password), password) == string