Dynamically scrape paginated table with BeautifulSoup and store results in csv?

I've made some changes to your script which should make it easier to debug and to maintain. It uses pandas to make writing to CSV easier as well as concurrent.futures to speed things up. If you have questions let me know, basically each year is scraped concurrently, I scrape the first page to get the number of pages to scrape, then I loop over each page and parse the HTML. The key pieces of info are put into a dictionary then added to a list (which is easier to write to csv via pandas since it's basically a dataframe already - a list of dicts)

import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import re
import concurrent.futures

def scrape_year(year):

    headers =   {
        'accept':'*/*',
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
        }

    url = f'https://aviation-safety.net/wikibase/dblist.php?Year={year}&sorteer=datekey&page=1'
    req = requests.get(url, headers=headers)

    soup = BeautifulSoup(req.text,'html.parser')

    page_container = soup.find('div',{'class':'pagenumbers'})
    pages = max([int(page['href'].split('=')[-1]) for page in  page_container.find_all('a')])

    info = []
    for page in range(1,pages+1):

        new_url = f'https://aviation-safety.net/wikibase/dblist.php?Year={year}&sorteer=datekey&page={page}'
        print(new_url)

        data = requests.get(new_url,headers=headers)
        soup = BeautifulSoup(data.text,'html.parser')


        table = soup.find('table',{'class':'hp'})


        regex = re.compile('list.*')
        for index,row in enumerate(table.find_all('tr',{'class':regex})):
            if index == 0:
                continue

            acc_link = 'https://aviation-safety.net/'+row.find('a')['href']
            try:
                acc_date = datetime.strptime(row.find('a').text.strip(),'%d-%b-%Y').strftime('%Y-%m-%d')
            except ValueError:
                try:
                    acc_date = datetime.strptime("01"+row.find('a').text.strip(),'%d-%b-%Y').strftime('%Y-%m-%d')
                except ValueError:
                    try:
                        acc_date = datetime.strptime("01-01"+row.find('a').text.strip(),'%d-%b-%Y').strftime('%Y-%m-%d')
                    except ValueError:
                        continue

            acc_type = row.find_all('td')[1].text
            acc_reg = row.find_all('td')[2].text
            acc_operator = row.find_all('td')[3].text
            acc_fat = row.find_all('td')[4].text
            acc_location = row.find_all('td')[5].text
            acc_dmg = row.find_all('td')[7].text

            item = {
                'acc_link' : acc_link,
                'acc_date': acc_date,
                'acc_type': acc_type,
                'acc_reg': acc_reg,
                'acc_operator' :acc_operator,
                'acc_fat':acc_fat,
                'acc_location':acc_location,
                'acc_dmg':acc_dmg
                }

            info.append(item)

    df= pd.DataFrame(info)
    df.to_csv(f'{year}_aviation-safety.csv',index=False)


if __name__ == "__main__":

    START = 1916
    STOP = 2022

    years = [year for year in range(START,STOP+1)]

    print(f'Scraping {len(years)} years of data')

    with concurrent.futures.ThreadPoolExecutor(max_workers=60) as executor:
            final_list = executor.map(scrape_year,years)

What happens?

First of all, always look into the soup - therein lies the truth.

You are missing the headers in the request of the while loop, that causes an 403 error and the table is not select correct.

How to achieve?

Set the headers for your request in the while loop correct:

html = requests.get(url , headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'})

Select your rows more specific - Note there is no tbody in the html:

        # Go throught table = tbody and extract the data under the 'td' tag
        for row in soup.select('table tr.list'):

Also check the selectors for the pagination:

# If more than one page then iterate through all of them        
if soup.select_one('div.pagenumbers span.current + a'):
    url = 'https://aviation-safety.net/wikibase/dblist.php'+soup.select_one('div.pagenumbers span.current + a')['href']
else:
    break

Example

import requests, csv
from bs4 import BeautifulSoup
from urllib.request import Request

url = 'https://aviation-safety.net/wikibase/dblist.php?Year=1916&sorteer=datekey&page=1'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'}

with open('1916_aviation-safety.csv', "w", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["acc. date", "Type", "Registration","operator", "fat", "Location", " ", "dmg", " ", " "])

    while True:
        print(url)
        html = requests.get(url , headers = headers)
        soup = BeautifulSoup(html.text, 'html.parser')

        # Go throught table = tbody and extract the data under the 'td' tag
        for row in soup.select('table tr.list'):
            writer.writerow([c.text if c.text else '' for c in row.select('td')])
            print(row)

        # If more than one page then iterate through all of them        
        if soup.select_one('div.pagenumbers span.current + a'):
            url = 'https://aviation-safety.net/wikibase/dblist.php'+soup.select_one('div.pagenumbers span.current + a')['href']
        else:
            break

Just in case

Alternative solution with pandas.read_html() that iterates over all years:

import requests,time,random
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'}
url = 'https://aviation-safety.net/wikibase/'
req = requests.get(url , headers = headers)
soup = BeautifulSoup(req.text, 'html.parser')


data = []

for url in ['https://aviation-safety.net/'+a['href'] for a in soup.select('a[href*="/wikibase/dblist.php"]')]:
    while True:

        html = requests.get(url, headers = headers)
        soup = BeautifulSoup(html.text, 'html.parser')

        data.append(pd.read_html(soup.prettify())[0])

        # If more than one page then iterate through all of them        
        if soup.select_one('div.pagenumbers span.current + a'):
            url = 'https://aviation-safety.net/wikibase/dblist.php'+soup.select_one('div.pagenumbers span.current + a')['href']
        else:
            break
        time.sleep(random.random())

df = pd.concat(data)
df.loc[:, ~df.columns.str.contains('^Unnamed')].to_csv('aviation-safety.csv', index=False)