perl learning : xml parse & xpath simple implementation



package   MYXML;
require      Exporter;
our @ISA       =qw(Exporter);
our $VERSION   = 1.00;         #version
our @EXPORT    =qw(MyXMLSimple);
our @EXPORT    =qw(GetNodeByPath);

###############################my xml simple protected sub##############
#get node tag of a nodestring like <%TAG% ... >
#in param: the nodestring
#return value: the tag : %TAG%
sub GetNodeTag
{
     my ($NodeString) = @_;
     $NodeString =~ m/</s*//(/w+)/s*>/ or $NodeString =~ m/</s*(/w+).*?>/;
     return $1;
}
#get the attributes of a node that description by a string like <TAG attr1="xxx" attr2="xxx" ... >
#in param1: the nodestring
#in param2: a array to store the names and values of the attributes
#in the array: name1, value1, name2, value2, name3, value3,...
sub GetAtribList
{
     my ($nodestring, $atribarray) = @_;
     @$atribarray = $nodestring =~ m/(/w+)/s*=/s*[/"'](.*?)[/"']/g;
}
#push the attributes of a node into a hash that reprent the node
#in param1: the nodestring
#in param2: the hash that will reprent the node
sub pushattributes
{
  my ($nodestring, $hash) = @_;
  my @tsarray = ();
  GetAtribList($nodestring, /@tsarray);
  if (@tsarray)
  {
    for (my $j = 0; $j < @tsarray;)
    {
      my $key = $tsarray[$j];
      my $value = $tsarray[$j+1];
      $value =~ s/&quot;//"/g;
      $$hash{$key} = $value;
      $j += 2;
    }
  }
}
#push a couple of key and value to a hash
#in param1: the key
#in param2: the value
#in param3: the hash
sub pushhash
{
  my ($key, $value, $hash) = @_;
  if ($hash =~ /^HASH/)
  {
    if (exists
hash{$key})     {       my $ekvalue =
hash{$key};
      if ($ekvalue =~ /^ARRAY/)
      {
      my @array=@{$ekvalue};
     my $index = @array;
       
hash{$key}[$index] = $value;       }       else       {       my @tparray = ();       push (@tparray, $ekvalue);        push (@tparray, $value);
hash{$key}=[@tparray];
      }
    }
    else
    {
      $$hash{$key} = $value;
    }
  }
} #generate a xml dom hash by a nodearray that contained the nodestring of the xml file
#in param1: the nodearray that contained all the nodestring in the xml file <...>
#in param2: the begin pos to generate the xml dom hash in the nodearray
#in param3: the end pos to generate the xml dom hash in the nodearray
#in param4: the hash
sub GenerateXMLDom
{
   my ($nodearray, $begin, $length, $root)=@_;
   for (my $i=$begin; $i<$begin + $length; )
   {
      my $temp = $$nodearray[$i];
      my ($nodetag) = GetNodeTag($temp);
      if ($temp =~ m////s*>/) #single node
      {
         my %tshash1 = ();
         pushattributes($temp, /%tshash1);
         pushhash($nodetag, /%tshash1, $root);
         $i++;
         next;
      }
      elsif ($temp =~ m/</s*///) #end of complex node
      {
         $i++;
         next;
      }
      else  #complex node
      {
         my %tshash2 = ();
         pushattributes($temp, /%tshash2);
         my $headnum = 1;
         my $endnum = 0;
         my $start = $i;
         while ($endnum != $headnum && $i < $begin + $length-1)
         {
            $i++;
            my $nextnode = $$nodearray[$i];
            if ($nextnode =~ m////s*>/) #single node
            {
               $headnum++;
               $endnum++;
            }
            elsif ($nextnode =~ m/</s*///) #end of complex node
            {
               $endnum++;
            }
            else
            {
               $headnum++;
            }
         }
         $i++;
         my $len = $i - $start;
         GenerateXMLDom($nodearray, $start+1, $len-2, /%tshash2);
         pushhash($nodetag, /%tshash2, $root);
         next;
      }
   }
}
###############################end of my xml simple protected sub##############
################################my xml simple public sub#####################
#my xml simple, to read a xml file into a hash
#in param: the xml file to read
#return value: the hash that contained the xml dom content
sub MyXMLSimple
{
  my ($xmlfile) = @_;
  open (INFILE, "$xmlfile") or die "can't open $xmlfile for reading $!";
  my $filecontent = "";
  while (my $line=<INFILE>)
  {
     chomp $line;
     $filecontent.=$line;
  }
  close INFILE;
  #delete xml header
  $filecontent =~ s/</?.*?/?>//;
  #delete xml comment
  $filecontent =~ s/<!--.*?-->//g;
  #cut file context by <...>
  my @nodearray = $filecontent =~ m/<.*?>/g;
  my $length = @nodearray;
  my %rootnode = ();  #the root node of the xml file
  #construct the xml dom struct
  GenerateXMLDom(/@nodearray, 0, $length, /%rootnode);
  return %rootnode;
}
########################end of my xml simple#############################
###########################xpath protected sub###########################
#use to search the node by attributes, the node tag has already matched
#in param1:search xpath suchas TAGNAME[@attributename="attributevalue" and @attributename="attributevalue" or @attributename="attributevalue" ...]
#in param2:a hash with the key of TAGNAME
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub SearchNodeByAttribute
{
    my ($TagString, $hash, $result) = @_;
    #get the attribute name list
    #the element in the array is such as : name, =|!=, value, name, =|!=, value,...
    my @attributes = $TagString =~ m//@(/w+)/s*(!?=)/s*[/"|'](.*?)[/"|']/g;
    #get the logical relations between attributes if existed more then one attributes
    my $index = index($TagString, "[");
    my $endex = index($TagString, "]");
    my $attrstr = "";
    if ($index != -1 && $endex != -1)
    {
      my $len = $endex - $index - 1;
      $attrstr = substr($TagString, $index+1, $len);
    }
    #the logical relations list
    #a sample : and,or,and,or
    my (@atrirelation) = $attrstr =~ m//@/w+/s*!?=/s*[/"|'].*?[/"|']/s*(and|or)/s*/g;
    #if do not give the attributes, then found the node
    if (@attributes <= 0)
    {
        push (@$result, $hash);
        return 1;
    }
    #compare the existed attributes with the given attributes and store the result to a array
    my @tparray = ();
    for (my $i=0; $i < @attributes;)
    {
        my $attname = $attributes[$i];
        my $oper = $attributes[$i+1];
        my $attvalue = $attributes[$i+2];
        #$attvalue =~ tr/(&quot)//"/;
        my $exvalue = $$hash{$attname};
        my $b = 0;
        if ($oper =~ //s*=/s*/)
        {
            if ($exvalue =~ m/$attvalue/)
            {
               $b = 1;
            }
        }
        else
        {
            if ($exvalue =~ m/$attvalue/)
            {
                $b = 0;
            }
            else
            {
                $b = 1;
            }
        }
        push(@tparray, $b);
        $i += 3;
     }
     #use the logical relations beweent attributes to combine the compare results
     my $tpret = $tparray[0];
     for (my $j = 0; $j < @atrirelation; $j++)
     {
        if ($atrirelation[$j] =~ //s*and/s*/)
        {
            $tpret = $tpret && $tparray[$j+1];
        }
        else
        {
            $tpret = $tpret || $tparray[$j+1];
        }
     }
     #if the combined result is 1, then found it
     if ($tpret)
     {
        push(@$result, $hash);
     }
     return $tpret;
}
#use to search a node by the give tag and resultes from its parent node hash
#in param1:search xpath suchas TAGNAME[@attributename="attributevalue" and @attributename="attributevalue" or @attributename="attributevalue" ...]
#in param2:a hash that is the parent node
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub SearchNode
{
    my ($TagString, $hash, $result) = @_;
    if (length($TagString) == 0)
    {
        return 0;
    }
    #get the node tag
    $TagString =~ m/(/w+)/;
    my $Tag = $1;
    if ($hash =~ /^HASH/)  # must be a hash
    {
      if (exists
hash{$Tag})  # in the hash existed a key value match the node tag, means there is existed a child node in the hash that has the tag of $Tag       {         my $value =
hash{$Tag};   #get the existed child node hash or array
        if ($value =~ /^ARRAY/)     #there is existed more then on hash node that has the node tag of $Tag
        {
            my @values = @{$value};
            my $ret = 0;
            for (my $k=0; $k < @values; $k++)
            {
              #match each node in the array to find the node
              my ($b) = SearchNodeByAttribute($TagString, $values[$k], $result);
              if ($b)
              {
                $ret = 1;
              }
            }             return $ret;
        }
        else
        {
            #compare the given attributes to find the node
            return SearchNodeByAttribute($TagString, $value, $result);
        }
      }
      else
      {
          return 0;
      }
    }
    else
    {
       return 0;
    }
}
#the $hash as the root hash, and the $xpath was givened from the root, then to search the node by given xpath
#in param2:a hash that is the parent node
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub SearchFromRoot
{
    my ($xpath, $hash, $result) = @_;
    if ($xpath =~ m/^///)
    {
        $xpath = substr($xpath, 1);
    }
    if (length($xpath) == 0)
    {
        return 0;
    }
    #get the first node tag and attributes such as TAGNAME[@attributename="attributevalue" and ... or ...]
    my ($tag) = $xpath =~ m/(/s*/w+/s*(/[(/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*(and|or)/s*)*/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*/])?/s*)/;
    #remove the first node tag then for the second recure search
    $xpath =~ s//s*/w+/s*(/[(/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*(and|or)/s*)*/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*/])?/s*//;
    my @nodelist = ();
    my $ret = 0;
    #first find the hash node that match the first node tag and attributes that givened
    $ret = SearchNode($tag, $hash, /@nodelist);
    #if do not found, then return 0
    if ($ret == 0)
    {
              return 0;
    }
    else
    {
        #found the first node tag and attributes node
        if (length($xpath) == 0)
        {
                  #recure search end, then store the results
                  for (my $i=0; $i < @nodelist; $i++)
                  {
                      push(@$result, $nodelist[$i]);
                  }
                  return 1;
        }
        else
        {
                  my $ret = 0;
                  for (my $i=0; $i < @nodelist; $i++)
                  {
                      #then search the second node tag and attributes
                      my ($b) = GetNodeByPath($xpath, $nodelist[$i], $result);
                      if ($b)
                      {
                        $ret = 1;
                      }
                  }
                  return $ret;
        }
    }
}
#used in recure search xpath sub.
#call the recure search sub in each element in the array
#in param1:a xpath
#in param2:a hash that is the root node
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub RecureSearchArrayNode
{
    my ($xpath, $value, $result) = @_;
    if ($value =~ /^HASH/)
    {
        return RecureSearchNode($xpath, $value, $result);
    }
    elsif ($value =~ /^ARRAY/)
    {
            my @tparray = @{$value};
            my $ret = 0;
            for (my $k=0; $k < @tparray; $k++)
            {
              my ($b) = RecureSearchArrayNode($xpath, $tparray[$k], $result);
              if ($b)
              {
                $ret = 1;
              }
            }
            return $ret;
    }
    else
    {
        return 0;
    }
}
#used for the recure search when the xpath was givened like : //TAG/TAG[...]/...
#thought that the xpath is givened from the root node hash, and each hash node in the xml hash dom can be the root node, then do the recure search
#in param1:a xpath
#in param2:a hash that is the root node
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub RecureSearchNode
{
    my ($xpath, $hash, $result) = @_;
    my $ret = 0;
    #first search the node from the given hash node
    $ret = SearchFromRoot($xpath, $hash, $result);
    #then recure the each element in the current hash node
    #while (($key, $value) = each(%$hash))
    for my $value (values %$hash)
    {
        if ($value =~ /^HASH/)
        {
            my ($b) = RecureSearchNode($xpath, $value, $result);
            if ($b)
            {
              $ret = 1;
            }
        }
        elsif ($value =~ /^ARRAY/)
        {
            my @tparray = @{$value};
            my $tpret = 0;
            for (my $k=0; $k < @tparray; $k++)
            {
              if ($tparray[$k] =~ /^HASH/)
              {
                  my ($b) = RecureSearchNode($xpath, $tparray[$k], $result);
                  if ($b)
                  {
                      $tpret = 1;
                  }
              }
              elsif ($tparray[$k] =~ /^ARRAY/)
              {
                  my ($b) = RecureSearchArrayNode($xpath, $tparray[$k], $result);
                  if ($b)
                  {
                    $tpret = 1;
                  }
              }
              else
              {
                  next;
              }
            }
            if ($tpret)
            {
               $ret = 1;
            }
        }
        else
        {
            next;
        }
    }
    return $ret;
}
#used search hash node by a string that flow xpath syntax, the xpath is a single path
#in param1:a xpath
#in param2:a hash that is the root node of a xml dom struct
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub SearchPathNode
{
    my ($xpath, $hash, $result) = @_;
    if (length($xpath) != 0)
    {
        if ($xpath =~ m/^/////) #recure search
        {
            $xpath = substr($xpath, 2);
            return RecureSearchNode($xpath, $hash, $result);
        }
        else  #search from root
        {
            return SearchFromRoot($xpath, $hash, $result);
        }
    }
    else
    {
        return 0;
    }
}
#used for Find the parent hash node of a given Hash node under a given hash key value
#in param1: the hash node to find
#in param2: a value of a key in the hash
#in param3: a array to store the found parent hash node
#return value: 1 means found the parent hash node, or 0 means not found
sub FindHashNode
{
    my ($curhashnode, $value, $result) = @_;
    if ($value =~ /^HASH/)  #if a hash then call GetHashParentNode
    {
        return GetHashParentNode($curhashnode, $value, $result);
    }
    elsif ($value =~ /^ARRAY/)  #if a array then deal with each element in the array
    {
        my @tparray = @{$value};
        foreach my $kv (@tparray)
        {
           my $b = FindHashNode($curhashnode, $kv, $result);
           if ($b)
           {
              return 1;
           }
        }
        return 0;
    }
    else   #maybe a attribute value key then return 0
    {
        return 0;
    }
}
###########################end of xpath protected sub###########################
########################################public xpath sub########################
#used search hash node by a string that flow xpath syntax, the xpath is a complex path that combine by more then one xpath
#in param1:a xpath
#in param2:a hash that is the root node of a xml dom struct
#in param3:a array to store the find node which was reprented by a hash
#return value: 1 means has found the node match the attributes or 0 means not found
sub GetNodeByPath
{
    my ($xpathlist, $hash, $result) = @_;
    if ($xpathlist =~ m/(((////|//)?/s*/w+/s*(/[(/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*(and|or)/s*)*/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*/])?/s*///s*)*(////|//)?/s*/w+/s*(/[(/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*(and|or)/s*)*/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*/])?/s*/|/s*)*((////|//)?/s*/w+/s*(/[(/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*(and|or)/s*)*/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*/])?/s*///s*)*(////|//)?/s*/w+/s*(/[(/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*(and|or)/s*)*/s*/@/w+/s*=/s*[/"|']/w+[/"|']/s*/])?/s*/)
    {
        my @xpatharray = split(//|/,$xpathlist);
        my $ret = 0;
        foreach my $xpath (@xpatharray)
        {
            if (length($xpath) == 0)
            {
                next;
            }
            my ($b) = SearchPathNode($xpath, $hash, $result);
            if ($b)
            {
              $ret = 1;
            }
        }
        return $ret;
    }
    else
    {
        print "the xpath $xpathlist does not match the syntax of xpath!/n";
        return;
    }
}
#use for get parent hash node
#in param1: current node hash
#in param2: root node hash
#in param3: a array to store parent hash node
#return value: 1 means success to get the parent node or 0 means failed to get parent node
sub GetHashParentNode
{
    my ($curhashnode, $roothashnode, $result) = @_;
    #print "curent hash node is $curhashnode /n"  ;
    #print "roothashnode is $roothashnode /n";
    if ($curhashnode == $roothashnode)
    {
        return 0;
    }
    for my $value (values %$roothashnode) # (my ($key, $value) = each(%$roothashnode))
    {
         #print "curent hash node is $curhashnode /n" ;
         #print "keyvalue is $value /n"  ;
        if ($value == $curhashnode)
        {
            %$result = %{$roothashnode};
            return 1;
        }
        if ($value =~ /^HASH/)
        {
            my $b = GetHashParentNode($curhashnode, $value, $result);
            if ($b)
            {
                return 1;
            }
        }
        elsif ($value =~ /^ARRAY/)
        {
            my @tparray = @{$value};
            foreach my $kt (@tparray)
            {
                if ($kt == $curhashnode)
                {
                    %$result = %{$roothashnode};
                    return 1;
                }
                my $c = FindHashNode($curhashnode, $kt, $result);
                if ($c)
                {
                    return 1;
                }
            }
        }
        else
        {
            next;
        }
    }
    return 0;
}
###########################end of my xpath#########################################

Comments

Popular posts from this blog

How to fix error : no module named sendgrid when try to use sendgrid python lib in PHP.

react-native run-android : sun.security.provider.cert path.SunCertPathBuilderException : unable to find valid certification path to req uested target

react-native run-android : do not build/update modified code(App.js)