0
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
do {
$entries = $xpath->query("//div[@class='identity']/img");
if(isset($entries[0])) break;
$entries = $xpath->query("//h1[@class='avatared']/a/img");
if(isset($entries[0])) break;
$entries = $xpath->query("//div[@class='avatared']/a/img");
if(isset($entries[0])) break;
$entries = $xpath->query("//div[@itemtype='http://schema.org/Person']/a/img");
} while(false);
if(!isset($entries[0])) continue;
$src = $entries[0]->getAttribute('src');
if(!preg_match('#[/=]([0-9a-f]{32})[\?&]#', $src, $matches)) continue;
$hash = $matches[1];
// спустя несколько строк
do {
$entries = $xpath->query("//div[@class='email']/script");
if(isset($entries[0])) break;
$entries = $xpath->query("//dl/dd[@class='email']/script");
} while(false);
if(isset($entries[0])) {
$rawcode = $entries[0]->textContent;
if(!preg_match("#eval\(decodeURIComponent\('(.*)'\)\)#", $rawcode, $matches)) continue;
$rawcode2 = urldecode($matches[1]);
if(!preg_match('#href=\\\\?"mailto:([^"\\\\]*)\\\\?"#', $rawcode2, $matches)) continue;
$email = $matches[1];
unset($entries);
} else do {
$entries = $xpath->query("//div[@class='avatared']/div[@class='details']/dl/dd/a[@data-email]");
if(isset($entries[0])) break;
$entries = $xpath->query("//ul[@class='vcard-details']/li[@class='vcard-detail']/a[@data-email]");
} while(false);
if(isset($entries[0])) {
$email = urldecode($entries[0]->getAttribute('data-email'));
}
Прототип программы, вытягивающей хэш аватарки и е-мейл из архивной копии профиля в «Гитхабе».
Запостил:
Nyancat ,
21.07.2021 (Updated 21.07.2021 )
Nyancat # 0
#гитхаб #граватар #вебархив #xpath #data-minig #bigdata #хуй-ня
LinuxGovno # 0
rotoeb # 0 ⇈
MAKAKA # 0 ⇈
LinuxGovno # 0