Remove duplicate email addresses based on domain
In php:
<?php
$domains = []; // list of domains we have already included
$cleanList = []; // "clean" email list
$list = file('/path/to/email-list.txt'); // load the raw list
// loop over the raw list
foreach($list as $email) {
// extract the domain from the email
$domain = preg_replace('/^.*@/', '', $email);
// if the domain has not been taken yet
if(!in_array($domain, $domains)) {
// add it to the list of taken domains
array_push($domains, $domain);
// add the email to the clean list
array_push($cleanList, $email);
}
}
// write the clean list out to a file
file_put_contents('/tmp/clean-emails.txt', implode("\n", $cleanList));