# Парсер и фильтрация почтовых адресов # Разделение базы email адресов по популярным доменам, фильтр недействующих и битых адресов $file = Read-Host -Prompt 'Enter list name' $spth = split-path -parent $MyInvocation.MyCommand.Definition $mpth = "$spth\list" if(!(Test-Path ($mpth))) { Write-Output "Result folder and address list didnt exist" Break } $flst = "$mpth\$file" $md5 = New-Object -TypeName System.Security.Cryptography.MD5CryptoServiceProvider $hash = [System.BitConverter]::ToString($md5.ComputeHash([System.IO.File]::ReadAllBytes($flst))) $path = "$mpth\$hash" if(Test-Path ($path)) { } else { md $path $ListSkip = Get-Content "$spth\skip" $WhiteListRE = Get-Content "$spth\whiteskip" $ListDomains = Get-Content "$spth\domain-exclude" $WhiteList = Get-Content "$spth\whitelist" $BlackList = Get-Content "$spth\addr-exclude" $SelfList = Get-Content "$spth\selflist" $global:ListValidDomains = @() $global:ListValidDomains += "@mail.ru", "@bk.ru", "@list.ru", "@yandex.ru", "@ya.ru" $global:ListInvalidDomains = @() $global:ListInvalidDomains += "@domain.ru", "@domain.com", "@test.ru", "@example.ru", "@example.com", "@test.com", "@mail.gmail.com" Function Clean-List { Param ( [parameter(Position=0, Mandatory=$true, ValueFromPipeline=$false)] $array ) Begin { $ListInput = $array } Process { ForEach ($addr in $ListInput) { if ($WhiteList -contains $addr) { Write-Output $addr | Out-File -filepath "$path\$file" -encoding "UTF8" -Append Continue } $mod_skip = 0 $addr_domn = ("@" + ($addr -split "@")[-1]) ForEach ($checkre in $WhiteListRE) { if ($addr -match $checkre) { Write-Output $addr | Out-File -filepath "$path\$file" -encoding "UTF8" -Append $mod_skip = 1 Break } } if ($mod_skip -eq 0) { ForEach ($check in $SelfList) { if ($addr -match $check) { Write-Output $addr | Out-File -filepath "$path\exclude_self.log" -encoding "UTF8" -Append $mod_skip = 1 Break } } } if ($mod_skip -eq 0) { ForEach ($scheck in $ListSkip) { if ($addr -match $scheck) { Write-Output $addr | Out-File -filepath "$path\exclude_skip.log" -encoding "UTF8" -Append $mod_skip = 1 Break } } } if ($mod_skip -eq 0) { if ($BlackList -contains $addr) { Write-Output $addr | Out-File -filepath "$path\exclude_bl.log" -encoding "UTF8" -Append Continue } } if ($mod_skip -eq 1) { Continue } elseif ($addr.length -gt 40) { Write-Output $addr | Out-File -filepath "$path\exclude_length.log" -encoding "UTF8" -Append } elseif ($ListDomains -contains $addr_domn) { Write-Output $addr | Out-File -filepath "$path\exclude_domains.log" -encoding "UTF8" -Append } elseif ($ListValidDomains -notcontains $addr_domn) { if ($ListInvalidDomains -notcontains $addr_domn) { $domain_dns = ($addr_domn -replace "@","") $domain_lkp = Resolve-DnsName $domain_dns -Type mx if ($domain_lkp) { $mx = $domain_lkp."NameExchange" $global:ListValidDomains += $addr_domn Write-Output $addr | Out-File -filepath "$path\$file" -encoding "UTF8" -Append } else { $global:ListInvalidDomains += $addr_domn Write-Output $addr | Out-File -filepath "$path\exclude_badmx.log" -encoding "UTF8" -Append } } else { Write-Output $addr | Out-File -filepath "$path\exclude_badmx.log" -encoding "UTF8" -Append } } else { Write-Output $addr | Out-File -filepath "$path\$file" -encoding "UTF8" -Append } } } } Get-Content $flst -ReadCount 1000 | Foreach { Clean-List $_ } }