Alle-Links-einer-Seite-in-array-schreiben.php
Quell Code
<?php
/*
$file = "https://php.de";
$doc = new DOMDocument();
$doc->loadHTMLFile($file);
$tag_name = 'a';
$elements = $doc->getElementsByTagName($tag_name);
echo "<pre>";
if (!is_null($elements)) {
foreach ($elements as $element) {
echo "<br/>". $element->nodeName. ": ";
$nodes = $element->childNodes;
foreach ($nodes as $node) {
echo $node->nodeValue;
}
}
}
*/
?>
<?php
function getUrls( $url ){
$content = file_get_contents( $url );
@$document = new DOMDocument();
@$document->loadHTML($content);
$path = new DOMXPath($document);
$urls = $path->evaluate("/html/body//a");
for ($x = 0; $x < $urls->length; $x++) {
$hrefs[$x] = $urls->item($x)->getAttribute('href');
}
return $hrefs;
}
@$link=getUrls('https://sebastian1012.bplaced.net/homepage-neu/');
echo "<pre>";
print_r($link);
echo "<pre>";
/*
@$doc = new DOMDocument();
@$doc->loadHTMLFile($link[33]);
@$tag_name = 'main';
@$tag2='article';
@$tag3='div';
$elements = $doc->getElementById($tag_name);
$elements = $doc->getElementsByTagName($tag2)[0];
$brennen=$elements->ownerDocument->saveHTML($elements);
// $html = file_get_contents($link[33]);
// foreach($html->find('h1') as $element){
// echo $element->innertext . '<br>';
// }
//echo $brennen;
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML($brennen);
libxml_clear_errors();
$h1List = $dom->getElementsByTagName('h1');
$article = $dom->getElementsByTagName('article');
if($h1List->length == 0){
}else{
foreach($h1List as $h1){
print_r($h1->nodeValue);// h1 auslesen
}
}
*/
?>