利用者:Kirche/sandbox/EngadgetJPとTCJのアーカイブ
表示
< 利用者:Kirche | sandbox
「TECHCRUNCH JAPAN」および「エンガジェット日本版」終了のお知らせ
GDPRにより海外からの閲覧を拒否し、本家サイトにリダイレクトしている。そのためWayback Machineでスナップショットを取得できない。Archive todayはOKな模様。
Count: 1604
$domains = @("japanese.engadget.com", "jp.techcrunch.com")
$protcols = @('http', 'https')
$urls = @()
foreach($domain in $domains){
foreach($protcol in $protcols)
{
$continue = ""
do {
$uri = "https://ja-two.iwiki.icu/w/api.php?action=query&list=exturlusage&format=json&eulimit=500&euprotocol=$protcol&euquery=$domain"
if ($continue -ne "") { $uri += "&eucontinue=$continue" }
$result = Invoke-RestMethod $uri -RetryIntervalSec 5 -MaximumRetryCount 10
Write-Host $result.query.exturlusage.count
$result.query.exturlusage | ForEach-Object { $u = New-Object Uri $_.url; $urls += "$($u.Scheme)://$($u.Host)$($u.LocalPath)" }
$continue = $result.continue.eucontinue
}while ($continue -ne "" -and $continue -ne $null)
}
}
Write-Host $urls.Count -NoNewline
Write-Host " -> " -NoNewline
$urls = ($urls | Sort-Object -Unique)
Write-Host $urls.Count
$table="{| class=`"wikitable sortable`"`n!URL!!リンク検索!!Archive`n"
$issue = @()
foreach($url in $urls)
{
$table+="|-`n|[$url $url]||[[特別:外部リンク検索/$url|L]]||"
Write-Progress -Activity $url
$result = Invoke-RestMethod "https://archive.org/wayback/available?url=$url" -RetryIntervalSec 5 -MaximumRetryCount 10
if($result.archived_snapshots.closest.available -ne $true -or $result.archived_snapshots.closest.status -ne [System.Net.HttpStatusCode]::OK.value__.ToString())
{
$issue += $url
}else {
$table+="[$($result.archived_snapshots.closest.url)]"
}
$table+="`n"
}
$table+='|}'
Out-File -FilePath "~/table.txt" -InputObject $table