Resize Excel file sheet columns using Python in Azure

I am unable to resize columns in excel sheets for Python when working with Blob Storage in Azure. Let me know what am I doing wrong. Following is the relevant code snippet -

    def resize_columns(df, data_val, sheet_name):
        wb =  openpyxl.load_workbook(data_val)
        
        worksheet = wb[sheet_name]
        cols = list(df.columns)
        count = 0

        for column in worksheet.columns:
            length = max(df[cols[count]].astype(str).str.len().max(), len(cols[count]))*1.25
            length = length if length <= 16 else 16
            worksheet.column_dimensions[column[0].column_letter].width = length
            count += 1

        return data_val
        

    connect_str = "..."

    blob_client = blob_service_client.get_blob_client(container=new_container_name, blob=file_name)

    data_val = io.BytesIO()

    writer = pd.ExcelWriter(data_val, engine="openpyxl")
    mpm_summary = pd.DataFrame({"one_val": [1, 2]})
    mpm_summary.to_excel(writer, sheet_name="A", index=False)

    mpm_summary = pd.DataFrame({"two_val00000000000000000000000000000000000000": [3, 4], "three_val": [5,6]})
    mpm_summary.to_excel(writer, sheet_name="B", index=False)

    writer.save()

    data_val = resize_columns(mpm_summary, data_val, "B")

    blob_client.upload_blob(data_val.getvalue(), overwrite=True)

Your help is much appreciated. Thanks


Solution 1:

Instead of loading the workbook again with

wb = openpyxl.load_workbook(data_val)

you can just use the writer pointer itself to access the worksheet, carry out changes and save it before uploading the blob.


def resize_columns(writer,df, data_val, sheet_name):
    #wb =  openpyxl.load_workbook(data_val)
    worksheet = writer.book[sheet_name] #wb[sheet_name]
    cols = list(df.columns)
    count = 0

    for column in worksheet.columns:
        length = max(df[cols[count]].astype(str).str.len().max(), len(cols[count]))*1.25
        length = length if length <= 16 else 16
        worksheet.column_dimensions[column[0].column_letter].width = length+0.78
        count += 1
    return data_val

blob_client = blob_service_client.get_blob_client(container=container_name, blob=file_name)

data_val = io.BytesIO()

writer = pd.ExcelWriter(data_val, engine="openpyxl")
mpm_summary = pd.DataFrame({"one_val": [1, 2]})
mpm_summary.to_excel(writer, sheet_name="A", index=False)

mpm_summary = pd.DataFrame({"two_val00000000000000000000000000000000000000": [3, 4], "three_val": [5,6]})
mpm_summary.to_excel(writer, sheet_name="B", index=False)


data_val = resize_columns(writer,mpm_summary, data_val, "B")
writer.save()

blob_client.upload_blob(data_val.getvalue(), overwrite=True)

Please be aware of the slight difference/correction factor required between openpyxl width Vs. excel column width. So your multiplication factor or offset may require fine tuning. https://foss.heptapod.net/openpyxl/openpyxl/-/issues/293

openpyxl - adjust column width size