www.webdeveloper.com
Results 1 to 5 of 5

Thread: Printing array values | Meta Tag Analyzer

  1. #1
    Join Date
    Jun 2008
    Location
    Europe
    Posts
    1,086

    Printing array values | Meta Tag Analyzer

    I have an array that I would like to print out.

    I have a test form up here that will analyze a given site's meta tags. I would like to simply get the meta-tags, and then list them or put them into a table...

    So far, here is what I have:
    http://metataggenerator.org/analyzer/

    You can enter an URL into the box and the script will print out the meta tags in a list or table.

    The code thus far:
    PHP Code:
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html xmlns="http://www.w3.org/1999/xhtml">
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <title>Meta Tag Analyzer</title>

    </head>

    <body>
    <form action="test.php" method="post">
    <input type="text" name="url"  onFocus="this.value=''; this.onfocus=null;" onblur="this.value='Enter your URL here';" value="Enter your URL here" />
    <input type="submit" value="Analyze Meta Tags" />
    </form><br />
    <?php
    $url 
    $_POST['url'];
    $url str_replace("Enter your URL here"''$url);  //makes sure that there is an URL
    if ($url == '' ) {
        echo  
    "Enter an URL to analyze above";
        echo 
    "\n";
    } else {
        
    $url "http://$url";                         // adds http:// to the URL in case the user forgets it...
    $url preg_replace('[http://http://]''http://'$url);     // makes the form work with or without the http:// by simply removing double http://http://
    function getUrlData($url)
    {    
        
    $result false;     
        
    $contents getUrlContents($url);     
        if (isset(
    $contents) && is_string($contents))    
        {        
            
    $title null;        
            
    $metaTags null;         
            
    preg_match('/<title>([^>]*)<\/title>/si'$contents$match );         

            if (isset(
    $match) && is_array($match) && count($match) > 0)        
            {            
                
    $title strip_tags($match[1]);        
            }         

            
    preg_match_all('/<[\s]*meta[\s]*name="?' '([^>"]*)"?[\s]*' 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si'$contents$match);         
            if (isset(
    $match) && is_array($match) && count($match) == 3)        
            {            
            
    $originals $match[0];            
            
    $names $match[1];            
            
    $values $match[2];             
            if (
    count($originals) == count($names) && count($names) == count($values))            
            {                
            
    $metaTags = array();                 
            for (
    $i=0$limiti=count($names); $i $limiti$i++)                
            {                    
            
    $metaTags[$names[$i]] = array (                        
                
    'html' => htmlentities($originals[$i]),                        
                
    'value' => $values[$i]                    
                );                
            }            
           }        
          }         

    $result = array (            
        
    'title' => $title,            
        
    'metaTags' => $metaTags        
        
    );    
      }     
    return 
    $result;
    } function 
    getUrlContents($url$maximumRedirections null$currentRedirection 0)
    {    
    $result false;     
    $contents = @file_get_contents($url);     // Check if we need to go somewhere else     
    if (isset($contents) && is_string($contents))    
    {        
    preg_match_all('/<[\s]*meta[\s]*http-equiv="?REFRESH"?' '[\s]*content="?[0-9]*;[\s]*URL[\s]*=[\s]*([^>"]*)"?' '[\s]*[\/]?[\s]*>/si'$contents$match);        
    if (isset(
    $match) && is_array($match) && count($match) == && count($match[1]) == 1)        
    {            
    if (!isset(
    $maximumRedirections) || $currentRedirection $maximumRedirections)            
    {                
    return 
    getUrlContents($match[1][0], $maximumRedirections, ++$currentRedirection);
                }            
     
    $result false;        
    }        else        
    {            
    $result $contents;        
    }    
    }     
    return 
    $contents;

    $result getUrlData("$url"); 
    echo 
    '<hr />';
    echo 
    '<h1>Results</h1>';
    echo 
    "<b>Results for:</b> ";
    echo 
    '<font color="red" face="courier">';
    echo 
    $url;
    echo 
    '</font>';
    echo 
    '<br />';
    echo 
    "&lt;title&gt;";
    echo 
    $result[title];
    echo 
    '&lt;/title&gt;';
    echo 
    '<br /><font size="-1">';
    echo 
    $result[metaTags][description][html]; 
    echo 
    '<br />';
    echo 
    $result[metaTags][keywords][html]; 
    echo 
    '<br />';
    echo 
    $result[metaTags][Charset][html]; 
    echo 
    '<br />';
    echo 
    '<font color="red" face="arial">&uarr; <i>This is what I would like the results page to look like, except with the full listing of all metatags on the page</i></font>';
    echo 
    '<hr />';
    echo 
    'Here is a listing of the types of tags that appear on the page, but I cannot figure out how to extract the HTML array and display it...';

    // start table and print heading
    reset($result[metaTags]);
    list(
    $c1$c2) = each($result[metaTags]);
    echo(
    "<table><tr><td>$c1</td><td>$c2</td></tr>\n");
    // print the rest of the values
    while (list($c1,$c2) = each($result[metaTags])) {
      echo(
    "<tr><td>$c1</td><td>$c2</td></tr>\n");
    }
    // end the table
    echo("</table>");
    //end test


    echo '<hr />';

    echo 
    '</font>';
    /////////////////
    echo '<pre>'
    print_r($result[metaTags]); 
    echo 
    '</pre>';
    }

    ?>
    </body>
    </html>
    Last edited by donatello; 05-27-2011 at 11:46 AM.

  2. #2
    Join Date
    Aug 2004
    Location
    Ankh-Morpork
    Posts
    18,912
    I'd recommend using something like the DOM functions to get the data, e.g.:
    PHP Code:
    <?php
    /**
     * Get title and meta info from URL
     * @return mixed Array on success, else false
     * @param string $url URL to parse
     */
    function getMeta($url)
    {
       
    $content file_get_contents($url);
       if (!empty(
    $content)) {
          
    $data = array();
          
    $dom = new DOMDocument();
          @
    $dom->loadHTML($content);
          if(empty(
    $dom)) {
             
    user_error("Unable to parse text");
             return 
    false;
          }
          
    $titles $dom->getElementsByTagName('title');
          if(!empty(
    $titles)) {
             foreach(
    $titles as $title) {
                
    $data['title'] = $title->textContent;
                break; 
    // should only be one
             
    }
          }
          
    $metas $dom->getElementsByTagName('meta');
          foreach(
    $metas as $meta) {
             
    $tagData = array();
             if (
    $meta->hasAttributes()) {
                
    $attributes $meta->attributes;
                if (!
    is_null($attributes)) {
                   foreach(
    $attributes as $index => $attr) {
                      
    $tagData[$index] = $attr->value;
                   }
                   
    $data['meta'][] = $tagData;
                }
             }
          }
          return 
    $data;
       }
       else {
          
    user_error("Unable to get page");
          return 
    false;
       }
    }
    // TEST
    $metaData getMeta("http://www.ebookworm.us/");
    echo 
    "<p><b>Title:</b> ";
    echo !empty(
    $metaData['title']) ? $metaData['title'] : "[none]";
    echo 
    "</p>\n";
    echo 
    "<p><b>Meta Tags:</b></p>\n<ul>\n";
    foreach(
    $metaData['meta'] as $meta) {
       echo 
    "<li>";
       foreach(
    $meta as $name => $content) {
          echo 
    "<b>$name:</b> $content<br />\n";
       }
       echo 
    "</li>\n";
    }
    echo 
    "</ul>\n";
    "Please give us a simple answer, so that we don't have to think, because if we think, we might find answers that don't fit the way we want the world to be."
    ~ Terry Pratchett in Nation

    eBookworm.us

  3. #3
    Join Date
    Jun 2008
    Location
    Europe
    Posts
    1,086

    Smile

    Wow... thanks for that...

    Damn, you're fast!

    I'm testing it now, so far it works better than what I was tinkering with and I think with a few tweaks I will be back marking this as resolved...


    Thanks a bunch!

  4. #4
    Join Date
    Jun 2008
    Location
    Europe
    Posts
    1,086
    Here is what I was able to do... almost there, but since I want to represent the meta tags exactly as they appear and detect if they are HTML or xhtml... I need to grab the right bracket with the slash if it appears in xhtml or without if in plain HTML.

    See here:
    http://metataggenerator.org/analyzer/TEST.php

    I've gotten the output to look exactly what I like, except that I have manually re-added the trailing " />

    This does not exactly show what is in the source code...
    that is, for example, if a metatag appears like this:
    Code:
    <meta name="robots" content="index, follow">
    This script will show it like this:
    Code:
    <meta name="robots" content="index, follow" />
    ...with the trailing slash for xhtml...

    any ideas???

    Here is the code on the page now...
    PHP Code:
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html xmlns="http://www.w3.org/1999/xhtml">
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <title>Meta Tag Analyzer</title>

    </head>

    <body>
    <form action="TEST.php" method="post">
    <input type="text" name="url" />
    <input type="submit" value="Analyze Meta Tags" />
    </form><br />
    <?php
    $url 
    $_POST['url'];
    $url str_replace("Enter your URL here"''$url);  //makes sure that there is an URL
    if ($url == '' ) {
        echo  
    '&uarr;Enter an URL to analyze above.<br /><br />';
        echo 
    'If you are testing it for me from WebDeveloper.com, you can use this url: ';
        include(
    'TEST2.html');
        echo 
    "\n";
    } else {
        
    $url "http://$url";                         // adds http:// to the URL in case the user forgets it...
    $url preg_replace('[http://http://]''http://'$url);     // makes the form work with or without the http:// by simply removing double http://http://

    function getMeta($url

       
    $content file_get_contents($url); 
       if (!empty(
    $content)) { 
          
    $data = array(); 
          
    $dom = new DOMDocument(); 
          @
    $dom->loadHTML($content); 
          if(empty(
    $dom)) { 
             
    user_error("Unable to parse text"); 
             return 
    false
          } 
          
    $titles $dom->getElementsByTagName('title'); 
          if(!empty(
    $titles)) { 
             foreach(
    $titles as $title) { 
                
    $data['title'] = $title->textContent
                break; 
    // should only be one 
             

          } 
          
    $metas $dom->getElementsByTagName('meta'); 
          foreach(
    $metas as $meta) { 
             
    $tagData = array(); 
             if (
    $meta->hasAttributes()) { 
                
    $attributes $meta->attributes
                if (!
    is_null($attributes)) { 
                   foreach(
    $attributes as $index => $attr) { 
                      
    $tagData[$index] = $attr->value
                   } 
                   
    $data['meta'][] = $tagData
                } 
             } 
          } 
          return 
    $data
       } 
       else { 
          
    user_error("Unable to get page"); 
          return 
    false
       } 

    // TEST 
    $metaData getMeta("$url"); 
    echo 
    '&lt;title&gt;'
    echo !empty(
    $metaData['title']) ? $metaData['title'] : "[none]"
    echo 
    '&lt;/title&gt;';
    echo 
    "</p>\n"
    echo 
    "<p><b>Meta Tags:</b></p>\n<table>\n"
    foreach(
    $metaData['meta'] as $meta) { 
       echo 
    "<tr><td>&lt;meta "
       foreach(
    $meta as $name => $content) { 
          echo 
    "$name=&quot;$content&quot;\n"
       } 
       echo 
    ' /&gt;</td></tr>'

    echo 
    "</table>\n"
    }
    ?>
    </body>
    </html>

  5. #5
    Join Date
    Jun 2008
    Location
    Europe
    Posts
    1,086
    The latest iteration of the script is now here:
    http://www.metataggenerator.org/analyzer/

    I will try to figure out some way of checking if the source page is HTML or xhtml and whether or not the meta tags contain the closing slash...


    The script now displays the page's meta tags, the Facebook Open Graph Tags and if the 4 mandatory ones are present, if the page has a sitemap.xml file and a robots.txt file...

    Trying to think of other useful tricks...

Thread Information

Users Browsing this Thread

There are currently 1 users browsing this thread. (0 members and 1 guests)

Tags for this Thread

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
HTML5 Development Center



Recent Articles