Resize Excel file sheet columns using Python in Azure
I am unable to resize columns in excel sheets for Python when working with Blob Storage in Azure. Let me know what am I doing wrong. Following is the relevant code snippet -
def resize_columns(df, data_val, sheet_name):
wb = openpyxl.load_workbook(data_val)
worksheet = wb[sheet_name]
cols = list(df.columns)
count = 0
for column in worksheet.columns:
length = max(df[cols[count]].astype(str).str.len().max(), len(cols[count]))*1.25
length = length if length <= 16 else 16
worksheet.column_dimensions[column[0].column_letter].width = length
count += 1
return data_val
connect_str = "..."
blob_client = blob_service_client.get_blob_client(container=new_container_name, blob=file_name)
data_val = io.BytesIO()
writer = pd.ExcelWriter(data_val, engine="openpyxl")
mpm_summary = pd.DataFrame({"one_val": [1, 2]})
mpm_summary.to_excel(writer, sheet_name="A", index=False)
mpm_summary = pd.DataFrame({"two_val00000000000000000000000000000000000000": [3, 4], "three_val": [5,6]})
mpm_summary.to_excel(writer, sheet_name="B", index=False)
writer.save()
data_val = resize_columns(mpm_summary, data_val, "B")
blob_client.upload_blob(data_val.getvalue(), overwrite=True)
Your help is much appreciated. Thanks
Solution 1:
Instead of loading the workbook again with
wb = openpyxl.load_workbook(data_val)
you can just use the writer pointer itself to access the worksheet, carry out changes and save it before uploading the blob.
def resize_columns(writer,df, data_val, sheet_name):
#wb = openpyxl.load_workbook(data_val)
worksheet = writer.book[sheet_name] #wb[sheet_name]
cols = list(df.columns)
count = 0
for column in worksheet.columns:
length = max(df[cols[count]].astype(str).str.len().max(), len(cols[count]))*1.25
length = length if length <= 16 else 16
worksheet.column_dimensions[column[0].column_letter].width = length+0.78
count += 1
return data_val
blob_client = blob_service_client.get_blob_client(container=container_name, blob=file_name)
data_val = io.BytesIO()
writer = pd.ExcelWriter(data_val, engine="openpyxl")
mpm_summary = pd.DataFrame({"one_val": [1, 2]})
mpm_summary.to_excel(writer, sheet_name="A", index=False)
mpm_summary = pd.DataFrame({"two_val00000000000000000000000000000000000000": [3, 4], "three_val": [5,6]})
mpm_summary.to_excel(writer, sheet_name="B", index=False)
data_val = resize_columns(writer,mpm_summary, data_val, "B")
writer.save()
blob_client.upload_blob(data_val.getvalue(), overwrite=True)
Please be aware of the slight difference/correction factor required between openpyxl width Vs. excel column width. So your multiplication factor or offset may require fine tuning. https://foss.heptapod.net/openpyxl/openpyxl/-/issues/293
openpyxl - adjust column width size