why python pandas not able to generate xlsx file?

I am trying to achieve below tasks:

traverse_dir() function

- read a root directory, and get the names of the sub directories.
- read the sub directories and see if 'installed-files.json' file is present.
- if the 'installed-files.json' file is present in all the directories, then open them and create a excel file out of the JSON file those are present in all the sub directories.

filter_apk() function

- read the excel file generated in the first function and create another excel file that will store only file names ending with '.apk'.

Below is the code snippet:

def traverse_dir(rootDir, file_name):
    dir_names = []
    for names in os.listdir(rootDir):
        entry_path = os.path.join(names)
        if os.path.isdir(entry_path):
            dir_names.append(entry_path)
            for i in dir_names:
                if file_name in i:
                    with open(file_name) as jf:
                        data = json.load(jf)
                        df = pd.DataFrame(data)
                        new_df = df[df.columns.difference(['SHA256'])]
                        new_df.to_excel('abc.xlsx')

def filter_apk():
    traverse_dir(rootDir, file_name)
    old_xl = pd.read_excel('abc.xlsx')
    a = old_xl[old_xl["Name"].str.contains("\.apk")]
    a.to_excel('zybg.xlsx')

rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
filter_apk()

Note:

I have tested the code separately on single folder, and its working like charm. I am only facing issues when I am trying to work with multiple directories.
In fact, in the 1st function traverse_dir(), I am able to list the sub directories.

I am getting below errors while executing the program.

Traceback (most recent call last):
  File "Jenkins.py", line 36, in <module>
    filter_apk()
  File "Jenkins.py", line 30, in filter_apk
    old_xl = pd.read_excel('abc.xlsx')

    with open(filename, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: 'abc.xlsx'

Why the file is not getting generated? Any suggestions?

modified code

def traverse_dir(rootDir, file_name):
    dir_names = []
    for names in os.listdir(rootDir):
        entry_path = os.path.join(rootDir, names)
        if os.path.isdir(entry_path):
            dir_names.append(entry_path)
            for fil_name in dir_names:
                file_path = os.path.join(entry_path, fil_name, file_name)
                print(file_path)
                if os.path.isfile(file_path):
                    with open(file_path) as jf:
                        data = json.load(jf)
                        df = pd.DataFrame(data)
                        df1 = pd.DataFrame(data)
                        new_df = df[df.columns.difference(['SHA256'])]
                        new_df1 = df1[df.columns.difference(['SHA256'])]
                        with pd.ExcelWriter('abc.xlsx') as writer:
                            new_df.to_excel(writer, sheet_name='BRA', index=False)
                            new_df1.to_excel(writer, sheet_name='CNA', index=False)
                else:
                    raise FileNotFoundError

rootDir = <path to subdirs
file_name = 'installed-files.json'
traverse_dir(rootDir, file_name)

The main issue is that if file_name in i: is always false, hence no xlsx file created. You may need to make some changes to test for the file existence, for example:

import os
def traverse_dir(rootDir, file_name):
    dir_names = []
    for names in os.listdir(rootDir):
        entry_path = os.path.join(names)
        if os.path.isdir(entry_path):
            dir_names.append(entry_path)
            for i in dir_names:
                file_path=os.path.join(rootDir,i,file_name)
                if os.path.isfile(file_path):
                    with open(file_path) as jf:
                        data = json.load(jf)
                        df = pd.DataFrame(data)
                        new_df = df[df.columns.difference(['SHA256'])]
                        new_df.to_excel('abc.xlsx')

def filter_apk():
    traverse_dir(rootDir, file_name)
    old_xl = pd.read_excel('abc.xlsx')
    a = old_xl[old_xl["Name"].str.contains("\.apk")]
    a.to_excel('zybg.xlsx')

rootDir = '<root path where sub folders resides>'
file_name = 'installed-files.json'
filter_apk()

why python pandas not able to generate xlsx file?

Related

Recent Posts