hi,

I am trying to validate a huge xml document perhaps with 10 lakh lines it.

The problem is that the validation error happens above line 65535.

I think there is a problem or limitation in libxml2 library. Please help me

i would attach me php script below.

-------------------------- < PHP SCRIPT > --------------------------------------------

<?php
ini_set('max_execution_time', 3000); // Maximum exection time for validating large XML Data
//ini_set('memory_limit', '-1'); // Memory Limit is set to '-1' : unlimited
define("LIMIT","<br/>");
$error="";
$count=0;
$line_no=0;
// Array of all the nodes
$formats =array(
"Herstellernummer"=>"o_varchar_255",
"Herstellername"=>"c_varchar_255",
"Eigenmarke"=>"o_boolean",// 0 or 1 only
"ID"=>"c_varchar_255",
"Name"=>"c_varchar_255",
"Beschreibung"=>"o_varchar_12255",
"Anzahl"=>"o_Decimal",
"Packungseinheit"=>"o_varchar_12255",
"Packungsmenge"=>"o_Decimal",
"Basismengeneinheit"=>"o_varchar_12255",
"MwSt"=>"o_int",
"EAN"=>"o_varchar_255",
"HIBC"=>"o_varchar_255",
"PZN"=>"o_varchar_255",
"Bild1URL"=>"o_url",
"DatenblattURL"=>"o_url",
"ExtraDateiURL"=>"o_url",
"Groesse"=>"o_varchar_100",
"Geschlecht"=>"o_varchar_100",
"Farbe"=>"o_varchar_100",
"Material"=>"o_varchar_100",
"Kategorien"=>array("Kategorie"=>"o_varchar_100"),
"ZugehoerigeArtikel"=>array("ID"=>"o_varchar_255"),
"Angebote"=>array(
"Angebot"=>array(
"Bestellnummer"=>"c_varchar_255",
"GueltigAb"=>"o_date",
"GueltigBis"=>"o_date",
"MinBestellmenge"=>"c_int",
"Preis"=>"c_Decimal",
"Rabattfaehig"=>"o_boolean",
"Beschaffungsartikel"=>"o_int",
"Lagerstatus"=>"c_boolean"
)
)
);
// Reading the XML Document
$xml_read = new XMLReader;
//$xml_read->open('xml2wwb_20130323_001626.xml');
//$xml_read->open('xml2wwb_eas_20130405_162920.xml');
//$xml_read->open('xml2wwb_20130215_214835.xml');


$xml_read->open('sample.xml');
$doc = new DOMDocument;
//echo $xml_read->expand()->getLineNo()."<br/>";
function is_valid_xml ( $xml ) {
libxml_use_internal_errors( true );

$doc = new DOMDocument('1.0', 'utf-8');
$doc->loadXML( $xml );
$errors = libxml_get_errors();
return empty( $errors );
}


while ($xml_read->read() && $xml_read->name !== 'Artikel');
while ($xml_read->name === 'Artikel')
{
$count++;

$line_no=$xml_read->expand()->getLineNo();
$Artikel = simplexml_import_dom($doc->importNode($xml_read->expand(), true));
foreach($formats as $f1_key => $f1_value)
{
if(is_array($f1_value))//checking for inner node
{
foreach($f1_value as $f2_key => $f2_value)
{
if(is_array($f2_value))//checking for inner node
{
foreach($f2_value as $f3_key => $f3_value)
{
$c_key=isset($Artikel->$f1_key->$f2_key->$f3_key)?$Artikel->$f1_key->$f2_key->$f3_key:null;
do_or_die($c_key,$f3_value,array($f3_key,$f2_key,$f1_key));
}

}
else
{
$c_key=isset($Artikel->$f1_key->$f2_key)?$Artikel->$f1_key->$f2_key:null;
do_or_die($c_key,$f2_value,array($f2_key,$f1_key));

}
}

}
else
{
$c_key=isset($Artikel->$f1_key)?$Artikel->$f1_key:null;
do_or_die($c_key,$f1_value,array($f1_key));

//if($f1_key=="Name"){$name=$Artikel->$f1_key;};
}
}

$xml_read->next('Artikel');
}


/* starts predefined functions */


function do_or_die($key,$value,$attribute=null)
{
//echo $attribute."<br/>";
global $error,$line_no;
// echo isset($key)?"true<br/>":"false";
// echo isset($Artikel->$f1_key)?"true<br/>":"false";
if(isset($key))//and !empty($key)
{
//var_dump($key);
data_check($key,$value,$attribute);
}
else
{ //var_dump($value);
$error.=echo_attr($attribute)." not present for Artikel @ line no ".$line_no.LIMIT; // for missing XML data
}
}

echo "No of Artikel in the XML Document : ".$count;
function data_check($data,$format,$attribute=null)
{
global $error,$line_no;
$arr_format=explode("_",$format);
switch($arr_format[0])
{
case "o":
if($arr_format[0]==="o"&&strlen($data)!=0)//condtion for optional tags and the data
{
check_again($arr_format,$data,$attribute);
}
break;
case "c":
if(strlen($data)!=0) // condition for Required tags and the data
{
check_again($arr_format,$data,$attribute);
}
else
{
$error.=" Error, Data not present - please fill in the required details for : ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}
break;
}

}
function check_again($arr_format,$data,$attribute=null)
{
global $error,$line_no;
switch(strtolower($arr_format[1])) // checking of Different Data Types
{
case "varchar":
if(!(strlen($data)<=intval($arr_format[2])))
{
$error.="Error,length exceeded - z.B.: Heraeus Kulzer - ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}
break;
case "decimal":
if(!iis_decimal($data))
{
$error.="Data_type(Decimal) Format Incorrect - z.B.: 2.00 - ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}
break;
case "boolean":
if(!iss_boolean($data))
{
$error.="Data_type(Boolean) Format Incorrect - z.B.: 0 - ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}
break;
case "date":
if(!iis_date($data))
{
$error.="Incorrect Date Format - z.B.: 2013-03-15 - ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}

break;
case "url":
if(!isValidURL($data))
{
$error.=" URL Format Incorrect - z.B.: bild1.jpg ODER http://www.meinshop.de/bild1.jpg - ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}
break;
case "int":
if(!iis_int($data))
{
$error.="Data_type(Integer) Format Incorrect - z.B.: 0 - ".echo_attr($attribute)." for Artikel @ line no ".$line_no.LIMIT;
}
break;
}
}
echo "<br/>".$error;
function iss_boolean($var)// Function to validate the Boolean type values
{
if((strlen($var)===1&&iis_int($var))&& ($var=="1" ||$var=="0"))
{
return true;
}
else
{
return false;
}
}

function iis_int($var)
{
//return preg_match("/^[0-9]$/", $var);
return preg_match("/^\d+$/", $var);
}
/*function iis_int($var)
{
if(strval(intval($var)) == strval($var)) {
return true;
}
else return false;
}*/
function iis_decimal($number_check) //Function to validate the Decimal type values
{
//$number_check = str_replace("," , "." , $number_check);
//if (preg_match( '/^[\-+]?[0-9]{1}\.{1}\,?[0-9]+$/', $number_check))
if (preg_match( '/^([1-9][0-9]*(.[0-9]+)?|0.[0-9]+|.[0-9]+|0)$/', $number_check))
{
return TRUE;
}
else

return false;

}
function isValidURL($url) // Function to validate URL's
{
return preg_match('|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i', $url);
}
function iis_date($input,$date_format = 'Y-m-d') // Function to validate Date Format
{
$date_format = 'Y-m-d';
$input = trim($input);
$time = strtotime($input);

$is_valid = date($date_format, $time) == $input;
return $is_valid;
}
function echo_attr($attributes)
{
$output="";
$numItems = count($attributes);
$i = 0;
foreach($attributes as $attribute)
{
if(++$i === $numItems) {
$output.=$attribute;
}
else
{
$output.=$attribute."->";
}
}
return $output;
}

?>

-------------------------- < END OF SCRIPT > -----------------------------------------

i have a sample XML file

----------------------- < XML SCRIPT > ----------------------------------

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<AlleArtikel>
<Artikel>
<Herstellernummer>D 10</Herstellernummer>
<Herstellername>AD-Arztbedarf</Herstellername>
<Eigenmarke>0</Eigenmarke>
<ID>A_00000103</ID>
<Name>Algisept-Spray D 10 | Algisept-Spray D 10, Sprühflasche 200 ml</Name>
<Beschreibung>Algisept-Spray D 10
Speziell konzipiert für die Desinfektion von Alginat- und Silikon-Abdrücken. Gute Materialverträglichkeit. Praxisgerechte, sparsame Anwendung. Abdrücke einsprühen, feucht halten und nach 5 Min. ausgießen. Nicht geeignet für Hydrokolloide.
Wirkungsspektrum:
Wirksam gegen Bakterien einschließlich Tbc, Pilze, HBV und HIV

Verkaufseinheit:
Algisept-Spray D 10, Sprühflasche 200 ml</Beschreibung>
<Packungseinheit>Sprühflasche</Packungseinheit>
<Anzahl></Anzahl>
<Packungsmenge>200</Packungsmenge>
<Basismengeneinheit>ml</Basismengeneinheit>
<MwSt>0</MwSt>
<EAN></EAN>
<HIBC></HIBC>
<PZN></PZN>
<Bild1URL>http://www.d-rect.de/pictures/gross/76655-01_gross.jpg</Bild1URL>
<Bild2URL></Bild2URL>
<DatenblattURL></DatenblattURL>
<ExtraDateiURL></ExtraDateiURL>
<Groesse></Groesse>
<Geschlecht></Geschlecht>
<Farbe></Farbe>
<Material></Material>
<Kategorien>
<Kategorie>002 Desinfektion / Sterilisation &gt; 002.010 Abformdesinfektion</Kategorie>
</Kategorien>
<ZugehoerigeArtikel>
<ID>A_00000103</ID>
<ID>A_00000104</ID>
</ZugehoerigeArtikel>
<Angebote>
<Angebot>
<Bestellnummer>47 51 72</Bestellnummer>
<GueltigAb></GueltigAb>
<GueltigBis></GueltigBis>
<MinBestellmenge>1</MinBestellmenge>
<Preis>4.15</Preis>
<Rabattfaehig>1</Rabattfaehig>
<Beschaffungsartikel>0</Beschaffungsartikel>
<Lagerstatus>1</Lagerstatus>
</Angebot>
</Angebote>
</Artikel>
</AlleArtikel>


----------------------- < END OF XML SCRIPT > ---------------------------


thanks

Regards

Ash