How do I use the attribute class to pick up the innerhtml at a given url and html file? I’ve been looking all over the internet for a clear explanation.
Where am I going wrong (I’m not used to the “->” and “=>” since I don’t know what they represent or do):
[code]
<?php
//should come back to here
function walkDOMForTagAndClass($element, $tagName, $class, $callback) {
if ($element->nodeType !== 1) return false; // invalid element
// we force case as XML vs. SGML are inconsistent on ths
$tagName = strtoupper($tagName);
if ($walk = $element->firstChild) do {
if (
($walk->nodeType == 1) &&
(strtoupper($walk->nodeName) == $tagName) &&
($walk->attributes->getNamedItem(‘class’) == $class)
) $callback($walk);
} while (
$walk = $walk->firstChild || $walk->nextSibling || (
$walk->parentNode == $element ? false : $walk->parentNode.nextSibling
)
);
}
$file = “https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV”;
$doc = new DOMDocument();
$doc->loadHTMLFile($file);
walkDOMForTagAndClass(
$doc,
‘div’,
//’columns tablet-8 small-10 tablet-order-3 small-order-2′,
‘nocrumbs’,
function($file) {
// do whatever it is you want with the matches here.
}
);
/*$html = “https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV”;
$dom = new DOMDocument();
$dom->loadHTML($html);*/
//Evaluate Anchor tag in HTML
$xpath = new DOMXPath($doc);
$hrefs = $xpath->evaluate(“/html/body//a”);
for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$url = $href->getAttribute(‘href’);
//remove and set target attribute
$href->removeAttribute(‘target’);
$href->setAttribute(“target”, “_blank”);
$newURL=$url.”/newurl”;
//remove and set href attribute
$href->removeAttribute(‘href’);
$href->setAttribute(“href”, $newURL);
}
// save html
$file=$doc->saveHTML();
echo $file;
?>
<?php
//should come back to here
function walkDOMForTagAndClass($element, $tagName, $class, $callback)
{
$elems = $element->getElementsByTagName($tagName);
foreach ($elems as $ele) {
if ($ele->getAttribute("class") == $class) {
$callback($ele);
}
}
}
$file = "thread59.html";
$doc = new DOMDocument();
$doc->loadHTMLFile($file);
walkDOMForTagAndClass(
$doc,
'div',
'nocrumbs',
function ($ele) {
echo $ele->ownerDocument->saveHTML($ele);
}
);
<i>
</i><?php
//should come back to here
function walkDOMForTagAndClass($element, $tagName, $class, $callback)
{
$elems = $element->getElementsByTagName($tagName);
foreach ($elems as $ele) {
if ($ele->getAttribute("class") == $class) {
$callback($ele);
}
}
}
$file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV"; //thread59.html
$doc = new DOMDocument();
$doc->loadHTMLFile($file);
walkDOMForTagAndClass(
$doc,
'div',
'nocrumbs',
function ($ele) {
echo $ele->ownerDocument->saveHTML($ele);
}
);
?>
<i>
</i> $file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$let.$s."&t=KJV";
//$file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV"; //thread59.html
$doc = new DOMDocument();
$doc->loadHTMLFile($file_link);
walkDOMForTagAndClass(
$doc,
'div',
'nocrumbs',
function ($ele) {
//echo $ele->ownerDocument->saveHTML($ele);
}
);
walkDOMForTagAndClass(
$doc,
'div',
'bubHead',
function ($ele) {
echo $ele->ownerDocument->saveHTML($ele);
}
);
<?php
ini_set('display_errors', '1');
error_reporting(E_ALL);
function walkDOMForTagAndClass($element, $tagName, $class, $callback)
{
$children = $element->childNodes;
foreach ($children as $child) {
if ($child->nodeType == XML_ELEMENT_NODE) {
if ($child->getAttribute("class") == $class
&& $child->tagName == $tagName) {
// Matching element found, call callback function
$callback($child);
} else {
walkDOMForTagAndClass($child, $tagName, $class, $callback);
}
}
}
}
$file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV";
$file = "thread59.html";
$doc = new DOMDocument();
set_error_handler(function () { /* ignore errors */});
$doc->loadHTMLFile($file);
restore_error_handler();
$body = $doc->getElementsByTagName('body')->item(0);
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'bubHead', function($ele) {
echo '.bubHead found, content: ' . $ele->nodeValue . '<br>';
});
}
);
(I set the errorhandler to an empty function in order to be able to use my debugger without trouble)<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>Unbenanntes Dokument</title>
</head>
<body>
<i> </i><div class="nocrumbs">
<i> </i> <div class="somediv">
<i> </i> <div class="bubHead">Content of 1st bubHead</div>
<i> </i> </div>
<i> </i></div>
<i> </i><span class="nocrumbs">Element 4 tag not matching</span>
<i> </i><div class="somediv">
<i> </i> <div class="nocrumbs">
<i> </i> <div class="somediv">
<i> </i> <div class="bubHead">Content of 2nd bubHead</div>
<i> </i> </div>
<i> </i> </div>
<i> </i></div>
<i> </i><div class="bla">Element 5 class not matching</div>
<i> </i><div class="nocrumbs">
<i> </i> <div class="bubHead">Content of 3rd bubHead</div>
<i> </i></div>
</body>
</html>
<i>
</i><div class="bubHead">
<div>
<h1>Lexicon :: Strong's H0 - <em>Not Available</em>
</h1>
<?php
ini_set('display_errors', '1');
error_reporting(E_ALL);
function walkDOMForTagAndClass($element, $tagName, $class, $callback)
{
$children = $element->childNodes;
foreach ($children as $child) {
if ($child->nodeType == XML_ELEMENT_NODE) {
if (($child->getAttribute("class") == $class || $class == '')
&& $child->tagName == $tagName) {
// Matching element found, call callback function
$callback($child);
} else {
walkDOMForTagAndClass($child, $tagName, $class, $callback);
}
}
}
}
$file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV";
$file = "thread59.html";
$doc = new DOMDocument();
set_error_handler(function () { /* ignore errors */});
$doc->loadHTMLFile($file);
restore_error_handler();
$body = $doc->getElementsByTagName('body')->item(0);
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'bubHead', function($ele) {
walkDOMForTagAndClass($ele, 'h1', '', function($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
$html = str_replace(PHP_EOL, '', $html);
echo 'h1 found, HTML: ' . $html . '<br>';
preg_match('/:: Strong's (..).*<em>(.*)</em>/', $html, $matches);
var_dump($matches);
});
});
}
);
HTML used for testing: <div class="nocrumbs">
<div class="somediv">
<div class="bubHead">Content of 1st bubHead</div>
</div>
</div>
<span class="nocrumbs">Element 4 tag not matching</span>
<div class="somediv">
<div class="nocrumbs">
<div class="somediv">
<div class="bubHead">
<div>
<h1>Lexicon :: Strong's H0 -
<em>Not Available</em>
</h1>
</div>
</div>
</div>
</div>
<div class="bla">Element 5 class not matching</div>
<div class="nocrumbs">
<div class="bubHead">Content of 3rd bubHead</div>
</div>
</div>
Notice: Trying to get property of non-object in C:. ..update_outlinetest_domdocument19.php on line 6
Warning: Invalid argument supplied for foreach() in C:. ..test_domdocument19.php on line 7[/quote]
<i>
</i> $children = $element->childNodes;
foreach ($children as $child) {
var_dump($element);
$children = $element->childNodes;
var_dump($children);
foreach ($children as $child) {
<i>
</i><?php
ini_set('display_errors', '1');
error_reporting(E_ALL);
function walkDOMForTagAndClass($element, $tagName, $class, $callback)
{
var_dump($element);
$children = $element->childNodes;
foreach ($children as $child) {
if ($child->nodeType == XML_ELEMENT_NODE) {
if (($child->getAttribute("class") == $class || $class == '')
&& $child->tagName == $tagName) {
// Matching element found, call callback function
$callback($child);
} else {
walkDOMForTagAndClass($child, $tagName, $class, $callback);
}
}
}
}
//$file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV";
$file = "test_thishtml7.html";
$doc = new DOMDocument();
set_error_handler(function () { /* ignore errors */});
$doc->loadHTMLFile($file);
restore_error_handler();
$body = $doc->getElementsByTagName('body')->item(0);
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'bubHead', function($ele) {
walkDOMForTagAndClass($ele, 'h1', '', function($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
$html = str_replace(PHP_EOL, '', $html);
echo 'h1 found, HTML: ' . $html . '<br>';
preg_match('/:: Strong's (..).*<em>(.*)</em>/', $html, $matches);
var_dump($matches);
});
});
}
);
phpinfo();
?>
<i>
</i><?php
ini_set('display_errors', '1');
error_reporting(E_ALL);
function walkDOMForTagAndClass($element, $tagName, $class, $callback)
{
var_dump($element);
$children = $element->childNodes;
foreach ($children as $child) {
if ($child->nodeType == XML_ELEMENT_NODE) {
if (($child->getAttribute("class") == $class || $class == '')
&& $child->tagName == $tagName) {
// Matching element found, call callback function
$callback($child);
} else {
walkDOMForTagAndClass($child, $tagName, $class, $callback);
}
}
}
}
//$file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV";
$file = "test_thishtml7.html";
$doc = new DOMDocument();
set_error_handler(function () { /* ignore errors */});
$doc->loadHTMLFile($file);
restore_error_handler();
$body = $doc->getElementsByTagName('body')->item(0);
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'bubHead', function($ele) {
walkDOMForTagAndClass($ele, 'h1', '', function($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
$html = str_replace(PHP_EOL, '', $html);
echo 'h1 found, HTML: ' . $html . '<br>';
preg_match('/:: Strong's (..).*<em>(.*)</em>/', $html, $matches);
var_dump($matches);
});
});
}
);
//phpinfo();
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title></title>
</head>
<body>
</body>
</html>
<i>
</i> <div class="columns small-12 table-styles">
<div class="lexicon-label">Root Word (Etymology)</div>
<div> A root </div>
</div>
<div id="outlineBiblical" class="__hidden"><div> <ol><li><p>father of an individual</li><li><p>of God as father of his people</li><li><p>head or founder of a household, group, family, or clan</li><li><p>ancestor<ol><li><p>grandfather, forefathers &#8212; of person</li><li><p>of people</li></ol></li><li><p>originator or patron of a class, profession, or art</li><li><p>of producer, generator (fig.)</li><li><p>of benevolence and protection (fig.)</li><li><p>term of respect and honour</li><li><p>ruler or chief (spec.)</li></ol></div></div>
<div class="lexStrongsDef"> <span class="Hb">אָב</span> <span class="strgtrans">ʼâb,</span> awb; a primitive word; father, in a literal and immediate, or figurative and remote application:&#8212;chief, (fore-) father(-less), <span class="strongsEcks"><a class="lexpop" rel="lexicon.strongsLegend">&#215;</a></span> patrimony, principal. Compare names in &#39;Abi-&#39;.</div>
<i>
</i> $file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$let.$s."&t=KJV";
//$file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV"; //thread59.html
$doc = new DOMDocument();
set_error_handler(function () { /* ignore errors */});
$doc->loadHTMLFile($file_link);
restore_error_handler();
$body = $doc->getElementsByTagName('body')->item(0);
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'bubHead', function($ele) {
walkDOMForTagAndClass($ele, 'h1', '', function($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
$html = str_replace(PHP_EOL, '', $html);
echo 'h1 found, HTML: ' . $html . '<br>';
preg_match('/:: Strong's (..).*<em>(.*)</em>/', $html, $matches);
//var_dump($matches);
$blbstrongs = $matches[1];
$blbtransliteration = $matches[2];
echo "blbstrongs: ".$blbstrongs."<br />n";
echo "blbtransliteration: ".$blbtransliteration."<br />n";
//array_push($blbgroup, array($blbstrongs, $blbtransliteration));
});
});
}
);
<i> </i>//var_dump($blbgroup);
<i> </i>// to pick up the rest of the information
<i> </i>walkDOMForTagAndClass($body, 'div', 'nocrumbs',
<i> </i> function ($ele) {
<i> </i> walkDOMForTagAndClass($ele, 'div', 'columns small-12 table-styles', function($ele) {
<i> </i> walkDOMForTagAndClass($ele, 'div', '', function($ele) {
<i> </i> $html = $ele->ownerDocument->saveHTML($ele);
<i> </i> $html = str_replace(PHP_EOL, '', $html);
<i> </i> echo 'div found, HTML: ' . $html . '<br>';
<i> </i> preg_match_all("#<b(div)b[^>]*>(.*?)</b(div)b>#si", $html, $divmatches, PREG_SET_ORDER);
<i> </i> //preg_match('<div>(.*)</div>/', $html, $divmatches);
<i> </i> echo "-----<br /><br />n";
<i> </i> echo "divmatches <pre style="color: blue; font-weight: bold;">";
<i> </i> var_dump($divmatches);
<i> </i> echo "</pre> ";
<i> </i> echo "<br /><br />n";
<i> </i> echo "--------------------<br /><br />n";
<i> </i> $blbdescr = $divmatches[0][2];
<i> </i> //$blbtransliteration = $matches[2];
<i> </i> echo "<span style="color: purple; font-weight: bold;">blbdescr: ".$blbdescr."</span><br />n";
<i> </i> //echo "blbtransliteration: ".$blbtransliteration."<br />n";
<i> </i> //array_push($blbgroup, array($blbstrongs, $blbtransliteration));
<i> </i> });
<i> </i> });
<i> </i> }
<i> </i>);
<i> </i>
<i> </i>//var_dump($blbgroup);
walkDOMForTagAndClass($body, 'h6', 'lexTitleGk',
function ($ele) {
echo 'h6 found, content greek: ' . $ele->nodeValue . '<br>';
});
walkDOMForTagAndClass($body, 'h6', 'lexTitleHb',
function ($ele) {
echo 'h6 found, content hebrew: ' . $ele->nodeValue . '<br>';
});
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'columns small-12 table-styles', function ($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
var_dump($html);
// check for root word
preg_match('/<div class="lexicon-label">Root Word (Etymology)</div>.*<div>(.*)</div>/isU', $html, $matches);
var_dump($matches);
if (count($matches) == 2) {
echo 'Root Word: ' . $matches[1];
}
// check for strongs definition
preg_match('/<div class="lexStrongsDef">(.*)</div>/isU', $html, $matches);
var_dump($matches);
if (count($matches) == 2) {
echo 'strongs definition: ' . $matches[1];
}
<i> </i> });
<i> </i>})
Check if it fits your needs.$children = $element->childNodes;
foreach ($children as $child) {
if ($child->nodeType == XML_ELEMENT_NODE) {
if (($child->getAttribute("class") == $class || $class == '')
&& $child->tagName == $tagName) {
// Matching element found, call callback function
$callback($child);
} else {
walkDOMForTagAndClass($child, $tagName, $class, $callback);
}
}
}
$file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$let.$s."&t=KJV";
//$file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV"; //thread59.html
$doc = new DOMDocument();
set_error_handler(function () { /* ignore errors */});
$doc->loadHTMLFile($file_link);
restore_error_handler();
$body = $doc->getElementsByTagName('body')->item(0);
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'bubHead', function($ele) {
walkDOMForTagAndClass($ele, 'h1', '', function($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
$html = str_replace(PHP_EOL, '', $html);
//echo 'h1 found, HTML: ' . $html . '<br>';
preg_match('/:: Strong's (..).*<em>(.*)</em>/', $html, $matches);
//var_dump($matches);
$blbstrongs = $matches[1];
$blbtransliteration = $matches[2];
echo "blbstrongs: ".$blbstrongs."<br />n";
echo "blbtransliteration: ".$blbtransliteration."<br />n";
//array_push($blbgroup, array($blbstrongs, $blbtransliteration));
});
});
}
);
//var_dump($blbgroup);
// to pick up the rest of the information
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'columns small-12 table-styles', function($ele) {
walkDOMForTagAndClass($ele, 'div', '', function($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
$html = str_replace(PHP_EOL, '', $html);
//echo 'div found, HTML: ' . $html . '<br>';
//"#<b(div)b[^>]*>(.*?)</b(div)b>#si"
preg_match_all("#<b(div)b[^>]*>(.*?)</b(div)b>#si", $html, $divmatches, PREG_SET_ORDER);
//preg_match('<div>(.*)</div>/', $html, $divmatches);
echo "-----<br /><br />n";
echo "divmatches: <pre style="color: blue; font-weight: bold;">";
var_dump($divmatches);
echo "</pre> ";
echo "<br /><br />n";
echo "--------------------<br /><br />n";
$blbdescr = $divmatches[0][2];
//$blbtransliteration = $matches[2];
echo "<span style="color: purple; font-weight: bold;">blbdescription: ".$blbdescr."</span><br />n";
//echo "blbtransliteration: ".$blbtransliteration."<br />n";
//array_push($blbgroup, array($blbstrongs, $blbtransliteration));
});
});
}
);
//var_dump($blbgroup);
walkDOMForTagAndClass($body, 'h6', 'lexTitleGk',
function ($ele) {
echo 'h6 found, content greek: ' . $ele->nodeValue . '<br>';
echo "<hr /><br /><br />n";
});
walkDOMForTagAndClass($body, 'h6', 'lexTitleHb',
function ($ele) {
echo 'h6 found, content hebrew: ' . $ele->nodeValue . '<br>';
echo "<hr /><br /><br />n";
});
walkDOMForTagAndClass($body, 'div', 'nocrumbs',
function ($ele) {
walkDOMForTagAndClass($ele, 'div', 'columns small-12 table-styles', function ($ele) {
$html = $ele->ownerDocument->saveHTML($ele);
var_dump($html);
// check for root word
preg_match('/<div class="lexicon-label">Root Word (Etymology)</div>.*<div>(.*)</div>/isU', $html, $etymmatches);
echo "<pre style="color: red;">";
var_dump($etymmatches);
echo "</pre>";
if (count($etymmatches) == 2) {
echo "Root Word (etymmatches): <span style="font-weight: bold;">".$etymmatches[1]."</span>";
echo "<hr /><br /><br />n";
}
// check for strongs definition
preg_match('/<div class="lexStrongsDef">(.*)</div>/isU', $html, $defmatches);
echo "<pre style="color: green;">";
var_dump($defmatches);
echo "</pre>";
if (count($defmatches) == 2) {
echo "strongs definition (defmatches): <span style="font-weight: bold;">".$defmatches[1]."</span>";
echo "<hr /><br /><br />n";
}
});
});