why python pandas not able to generate xlsx file?
I am trying to achieve below tasks:
traverse_dir() function
- read a root directory, and get the names of the sub directories.
- read the sub directories and see if 'installed-files.json' file is present.
- if the 'installed-files.json' file is present in all the directories, then open them and create a excel file out of the JSON file those are present in all the sub directories.
filter_apk() function
- read the excel file generated in the first function and create another excel file that will store only file names ending with '.apk'.
Below is the code snippet:
def traverse_dir(rootDir, file_name):
dir_names = []
for names in os.listdir(rootDir):
entry_path = os.path.join(names)
if os.path.isdir(entry_path):
dir_names.append(entry_path)
for i in dir_names:
if file_name in i:
with open(file_name) as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df.to_excel('abc.xlsx')
def filter_apk():
traverse_dir(rootDir, file_name)
old_xl = pd.read_excel('abc.xlsx')
a = old_xl[old_xl["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')
rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
filter_apk()
Note:
-
I have tested the code separately on single folder, and its working like charm. I am only facing issues when I am trying to work with multiple directories.
-
In fact, in the 1st function
traverse_dir()
, I am able to list the sub directories.
I am getting below errors while executing the program.
Traceback (most recent call last):
File "Jenkins.py", line 36, in <module>
filter_apk()
File "Jenkins.py", line 30, in filter_apk
old_xl = pd.read_excel('abc.xlsx')
with open(filename, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: 'abc.xlsx'
Why the file is not getting generated? Any suggestions?
modified code
def traverse_dir(rootDir, file_name):
dir_names = []
for names in os.listdir(rootDir):
entry_path = os.path.join(rootDir, names)
if os.path.isdir(entry_path):
dir_names.append(entry_path)
for fil_name in dir_names:
file_path = os.path.join(entry_path, fil_name, file_name)
print(file_path)
if os.path.isfile(file_path):
with open(file_path) as jf:
data = json.load(jf)
df = pd.DataFrame(data)
df1 = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df1 = df1[df.columns.difference(['SHA256'])]
with pd.ExcelWriter('abc.xlsx') as writer:
new_df.to_excel(writer, sheet_name='BRA', index=False)
new_df1.to_excel(writer, sheet_name='CNA', index=False)
else:
raise FileNotFoundError
rootDir = <path to subdirs
file_name = 'installed-files.json'
traverse_dir(rootDir, file_name)
The main issue is that if file_name in i:
is always false, hence no xlsx file created.
You may need to make some changes to test for the file existence, for example:
import os
def traverse_dir(rootDir, file_name):
dir_names = []
for names in os.listdir(rootDir):
entry_path = os.path.join(names)
if os.path.isdir(entry_path):
dir_names.append(entry_path)
for i in dir_names:
file_path=os.path.join(rootDir,i,file_name)
if os.path.isfile(file_path):
with open(file_path) as jf:
data = json.load(jf)
df = pd.DataFrame(data)
new_df = df[df.columns.difference(['SHA256'])]
new_df.to_excel('abc.xlsx')
def filter_apk():
traverse_dir(rootDir, file_name)
old_xl = pd.read_excel('abc.xlsx')
a = old_xl[old_xl["Name"].str.contains("\.apk")]
a.to_excel('zybg.xlsx')
rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
filter_apk()