$htmlString = file_get_contents('https://domen.ru/');
// Create a new DOMDocument object.
$htmlDom = new DOMDocument;
//Load the HTML string into our DOMDocument object.
@$htmlDom->loadHTML($htmlString);
$titleTags = $htmlDom->getElementsByTagName('title');
$descriptionTags = $htmlDom->getElementsByTagName('meta');
$keywordsTags = $htmlDom->getElementsByTagName('meta');
$h1Tags = $htmlDom->getElementsByTagName('h1');
$h2Tags = $htmlDom->getElementsByTagName('h2');
$h3Tags = $htmlDom->getElementsByTagName('h3');
$h4Tags = $htmlDom->getElementsByTagName('h4');
$h5Tags = $htmlDom->getElementsByTagName('h5');
$h6Tags = $htmlDom->getElementsByTagName('h6');
// Arrays to store H1 to H6 headings
$extractedtitleTags = [];
$extracteddescriptionTags = [];
$extractedkeywordsTags = [];
$extractedH1Tags = [];
$extractedH2Tags = [];
$extractedH3Tags = [];
$extractedH4Tags = [];
$extractedH5Tags = [];
$extractedH6Tags = [];
// Loop for title
foreach($titleTags as $titleTag){
// Get the node value of title tag
$titleValue = trim($titleTag->nodeValue);
$extractedtitleTags[] = $titleValue;
}
foreach ($descriptionTags as $descriptionTag) {
if ($descriptionTag->getAttribute('name') == 'description') {
$extracteddescriptionTags[] = $descriptionTag->getAttribute('content');
}
}
foreach ($keywordsTags as $keywordsTag) {
if ($keywordsTag->getAttribute('name') == 'keywords') {
$extractedkeywordsTags[] = $keywordsTag->getAttribute('content');
}
}
// Loop for h1
foreach($h1Tags as $h1Tag){
// Get the node value of h1 tag
$h1Value = trim($h1Tag->nodeValue);
$extractedH1Tags[] = $h1Value;
}
// Loop for h2
foreach($h2Tags as $h2Tag){
// Get the node value of h2 tag
$h2Value = trim($h2Tag->nodeValue);
$extractedH2Tags[] = $h2Value;
}
// Loop for h3
foreach($h3Tags as $h3Tag){
// Get the node value of h3 tag
$h3Value = trim($h3Tag->nodeValue);
$extractedH3Tags[] = $h3Value;
}
// Loop for h4
foreach($h4Tags as $h4Tag){
// Get the node value of h4 tag
$h4Value = trim($h4Tag->nodeValue);
$extractedH4Tags[] = $h4Value;
}
// Loop for h5
foreach($h5Tags as $h5Tag){
// Get the node value of h5 tag
$h5Value = trim($h5Tag->nodeValue);
$extractedH5Tags[] = $h5Value;
}
// Loop for h6
foreach($h6Tags as $h6Tag){
// Get the node value of h6 tag
$h6Value = trim($h6Tag->nodeValue);
$extractedH6Tags[] = $h6Value;
}
$headingsArray = [
"title" => $extractedtitleTags,
"description" => $extracteddescriptionTags,
"keywords" => $extractedkeywordsTags,
"h1" => $extractedH1Tags,
"h2" => $extractedH2Tags,
"h3" => $extractedH3Tags,
"h4" => $extractedH4Tags,
"h5" => $extractedH5Tags,
"h6" => $extractedH6Tags
];
echo "<pre>";
print_r($headingsArray);
echo "</pre>";
Как собрать список все URL страниц сайта.
$xml=simplexml_load_file('https://domen.ru/sitemap.xml');
// print_r($xml);
foreach($xml->url as $val)
{
echo $val->loc.'<br>';
}