root/fXML.php

Revision 796, 15.9 kB (checked in by wbond, 4 months ago)

Fixed ticket #405 - updated fXML to create a __ prefix for the default namespace and to implicitly use that for child and attribute access when no prefix is specified

LineHide Line Numbers
1 <?php
2 /**
3  * Provides functionality for XML files
4  *
5  * This class is implemented to use the UTF-8 character encoding. Please see
6  * http://flourishlib.com/docs/UTF-8 for more information.
7  *
8  * @copyright  Copyright (c) 2007-2010 Will Bond
9  * @author     Will Bond [wb] <will@flourishlib.com>
10  * @license    http://flourishlib.com/license
11  *
12  * @package    Flourish
13  * @link       http://flourishlib.com/fXML
14  *
15  * @version    1.0.0b4
16  * @changes    1.0.0b4  Updated the class to automatically add a `__` prefix for the default namespace and to use that for attribute and child element access [wb, 2010-04-06]
17  * @changes    1.0.0b3  Added the `$http_timeout` parameter to ::__construct() [wb, 2009-09-16]
18  * @changes    1.0.0b2  Added instance functionality for reading of XML files [wb, 2009-09-01]
19  * @changes    1.0.0b   The initial implementation [wb, 2008-01-13]
20  */
21 class fXML implements ArrayAccess
22 {
23     // The following constants allow for nice looking callbacks to static methods
24     const encode     = 'fXML::encode';
25     const sendHeader = 'fXML::sendHeader';
26    
27    
28     /**
29     * Encodes content for display in a UTF-8 encoded XML document
30     *
31     * @param  string $content  The content to encode
32     * @return string  The encoded content
33     */
34     static public function encode($content)
35     {
36         return htmlspecialchars(html_entity_decode($content, ENT_QUOTES, 'UTF-8'), ENT_QUOTES, 'UTF-8');
37     }
38    
39    
40     /**
41     * Sets the proper `Content-Type` HTTP header for a UTF-8 XML file
42     *
43     * @return void
44     */
45     static public function sendHeader()
46     {
47         header('Content-Type: text/xml; charset=utf-8');
48     }
49    
50    
51     /**
52     * Custom prefix => namespace URI mappings
53     *
54     * @var array
55     */
56     protected $__custom_prefixes;
57    
58     /**
59     * The dom element for this XML
60     *
61     * @var DOMElement
62     */
63     protected $__dom;
64    
65     /**
66     * An XPath object for performing xpath lookups
67     *
68     * @var DOMXPath
69     */
70     protected $__xpath;
71    
72     /**
73     * The XML string for serialization
74     *
75     * @var string
76     */
77     protected $__xml;
78    
79    
80     /**
81     * Create the XML object from a string, fFile or URL
82     *
83     * The `$default_namespace` will be used for any sort of methods calls,
84     * member access or array access when the element or attribute name does
85     * not include a `:`.
86     *
87     * @throws fValidationException    When the source XML is invalid or does not exist
88     *
89     * @param  fFile|string  $source        The source of the XML, either an fFile object, a string of XML, a file path or a URL
90     * @param  numeric       $http_timeout  The timeout to use in seconds when requesting an XML file from a URL
91     * @return fXML
92     */
93     public function __construct($source, $http_timeout=NULL)
94     {
95         // Prevent spitting out errors to we can throw exceptions
96         $old_setting = libxml_use_internal_errors(TRUE);
97        
98         $exception_message = NULL;
99         try {
100             if ($source instanceof DOMElement) {
101                 $this->__dom = $source;
102                 $xml         = TRUE;
103                
104             } elseif ($source instanceof fFile) {
105                 $xml = simplexml_load_file($source->getPath());
106                
107             // This handles URLs specially by adding a reasonable timeout
108             } elseif (preg_match('#^(?P<protocol>http(s)?)://#', $source, $matches)) {
109                
110                 if ($http_timeout === NULL) {
111                     $http_timeout = ini_get('default_socket_timeout');   
112                 }
113                
114                 // We use the appropriate protocol here so PHP can supress IIS https:// warnings
115                 $context = stream_context_create(array(
116                     $matches['protocol'] => array('timeout' => $http_timeout)
117                 ));
118                
119                 // If the URL is not loaded in time, this supresses the file_get_contents() warning
120                 $old_level = error_reporting(error_reporting() & ~E_WARNING);
121                 $xml = file_get_contents($source, 0, $context);
122                 error_reporting($old_level);
123                
124                 if (!$xml) {
125                     throw new fExpectedException('The URL specified, %s, could not be loaded', $source);
126                 }
127                
128                 $xml = new SimpleXMLElement($xml);
129                
130             } else {
131                 $is_path = $source && !preg_match('#^\s*<#', $source);
132                 $xml     = new SimpleXMLElement($source, 0, $is_path);
133             }
134        
135         } catch (Exception $e) {
136             $exception_message = $e->getMessage();
137             $xml = FALSE;
138         }
139        
140         // We want it to be clear when XML parsing issues occur
141         if ($xml === FALSE) {
142             $errors = libxml_get_errors();
143             foreach ($errors as $error) {
144                 $exception_message .= "\n" . $error->message;   
145             }
146             // If internal errors were off before, turn them back off
147             if (!$old_setting) {
148                 libxml_use_internal_errors(FALSE);   
149             }
150             throw new fValidationException(str_replace('%', '%%', $exception_message));
151         }
152        
153         if (!$old_setting) {
154             libxml_use_internal_errors(FALSE);   
155         }
156        
157         if (!$this->__dom) {
158             $this->__dom = dom_import_simplexml($xml);
159         }
160        
161         if ($this->__dom->namespaceURI && $this->__dom->prefix == '') {
162             $this->addCustomPrefix('__', $this->__dom->namespaceURI);
163         }
164     }
165    
166    
167     /**
168     * Allows access to the text content of a child tag
169     *
170     * The child element name (`$name`) may start with a namespace prefix and a
171     * `:` to indicate what namespace it is part of. A blank namespace prefix
172     * (i.e. an element name starting with `:`) is treated as the XML default
173     * namespace.
174     *
175     * @internal
176      *
177     * @param  string $name  The child element to retrieve
178     * @return fXML|NULL  The child element requested
179     */
180     public function __get($name)
181     {   
182         // Handle nice callback syntax
183         static $methods = array(
184             '__construct'     => TRUE,
185             '__get'           => TRUE,
186             '__isset'         => TRUE,
187             '__sleep'         => TRUE,
188             '__toString'      => TRUE,
189             '__wakeup'        => TRUE,
190             'addCustomPrefix' => TRUE
191             'getName'         => TRUE,
192             'getNamespace'    => TRUE,
193             'getPrefix'       => TRUE,
194             'getText'         => TRUE
195             'offsetExists'    => TRUE,
196             'offsetGet'       => TRUE,
197             'offsetSet'       => TRUE,
198             'offsetUnset'     => TRUE,
199             'toXML'           => TRUE,
200             'xpath'           => TRUE
201         );
202        
203         if (isset($methods[$name])) {
204             return array($this, $name);
205         }
206        
207         if ($this->__dom->namespaceURI && $this->__dom->prefix == '' && strpos($name, ':') === FALSE) {
208             $name = '__:' . $name;
209         }
210         $first_child = $this->query($name . '[1]');
211         if ($first_child->length) {
212             return $first_child->item(0)->textContent;
213         }
214        
215         return NULL;
216     }
217    
218    
219     /**
220     * The child element name (`$name`) may start with a namespace prefix and a
221     * `:` to indicate what namespace it is part of. A blank namespace prefix
222     * (i.e. an element name starting with `:`) is treated as the XML default
223     * namespace.
224     *
225     * @internal
226      *
227     * @param  string $name  The child element to check - see method description for details about namespaces
228     * @return boolean  If the child element is set
229     */
230     public function __isset($name)
231     {
232         if ($this->__dom->namespaceURI && $this->__dom->prefix == '' && strpos($name, ':') === FALSE) {
233             $name = '__:' . $name;
234         }
235         return (boolean) $this->query($name . '[1]')->length;
236     }
237    
238    
239     /**
240     * Prevents users from trying to set elements
241     *
242     * @internal
243      *
244     * @param  string $name   The element to set
245     * @param  mixed  $value  The value to set
246     * @return void
247     */   
248     public function __set($name, $value)
249     {
250         throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
251     }
252    
253    
254     /**
255     * The XML needs to be made into a string before being serialized
256     *
257     * @internal
258      *
259     * @return array  The members to serialize
260     */
261     public function __sleep()
262     {
263         $this->__xml = $this->toXML();
264         return array('__custom_prefixes', '__xml');   
265     }
266    
267    
268     /**
269     * Gets the string inside the root XML element
270     *
271     * @return string  The text inside the root element
272     */
273     public function __toString()
274     {
275         return (string) $this->__dom->textContent;   
276     }
277    
278    
279     /**
280     * Prevents users from trying to unset elements
281     *
282     * @internal
283      *
284     * @param  string $name  The element to unset
285     * @return void
286     */   
287     public function __unset($name)
288     {
289         throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
290     }
291    
292    
293     /**
294     * The XML needs to be made into a DOMElement when woken up
295     *
296     * @internal
297      *
298     * @return void
299     */
300     public function __wakeup()
301     {
302         $this->__dom = dom_import_simplexml(new SimpleXMLElement($this->__xml));
303         $this->__xml = NULL;
304     }
305    
306    
307     /**
308     * Adds a custom namespace prefix to full namespace mapping
309     *
310     * This namespace prefix will be valid for any operation on this object,
311     * including calls to ::xpath().
312     *
313     * @param  string $ns_prefix  The custom namespace prefix
314     * @param  string $namespace  The full namespace it maps to
315     * @return void             
316     */
317     public function addCustomPrefix($ns_prefix, $namespace)
318     {
319         if (!$this->__custom_prefixes) {
320             $this->__custom_prefixes = array();   
321         }
322         $this->__custom_prefixes[$ns_prefix] = $namespace;
323         if ($this->__xpath) {
324             $this->__xpath->registerNamespace($ns_prefix, $namespace);
325         }
326     }
327    
328    
329     /**
330     * Returns the name of the current element
331     *
332     * @return string  The name of the current element
333     */
334     public function getName()
335     {
336         return $this->__dom->localName;
337     }
338    
339    
340     /**
341     * Returns the namespace of the current element
342     *
343     * @return string  The namespace of the current element
344     */
345     public function getNamespace()
346     {
347         return $this->__dom->namespaceURI;
348     }
349    
350    
351     /**
352     * Returns the namespace prefix of the current element
353     *
354     * @return string  The namespace prefix of the current element
355     */
356     public function getPrefix()
357     {
358         return $this->__dom->prefix;
359     }
360    
361    
362     /**
363     * Returns the string text of the current element
364     *
365     * @return string  The string text of the current element
366     */
367     public function getText()
368     {
369         return (string) $this->__dom->textContent;
370     }
371    
372    
373     /**
374     * Provides functionality for isset() and empty() (required by arrayaccess interface)
375     *
376     * Offsets refers to an attribute name. Attribute may start with a namespace
377     * prefix and a `:` to indicate what namespace the attribute is part of. A
378     * blank namespace prefix (i.e. an offset starting with `:`) is treated as
379     * the XML default namespace.
380     *
381     * @internal
382      *
383     * @param  string $offset  The offset to check
384     * @return boolean  If the offset exists
385     */
386     public function offsetExists($offset)
387     {
388         return (boolean) $this->query('@' . $offset . '[1]')->length;
389     }
390    
391    
392     /**
393     * Provides functionality for get [index] syntax (required by ArrayAccess interface)
394     *
395     * Offsets refers to an attribute name. Attribute may start with a namespace
396     * prefix and a `:` to indicate what namespace the attribute is part of. A
397     * blank namespace prefix (i.e. an offset starting with `:`) is treated as
398     * the XML default namespace.
399     *
400     * @internal
401      *
402     * @param  string $offset  The attribute to retrieve the value for
403     * @return string  The value of the offset
404     */
405     public function offsetGet($offset)
406     {
407         $attribute = $this->query('@' . $offset . '[1]');
408         if ($attribute->length) {
409             return $attribute->item(0)->nodeValue;
410         }
411         return NULL;
412     }
413    
414    
415     /**
416     * Required by ArrayAccess interface
417     *
418     * @internal
419      *
420     * @param  integer|string $offset  The offset to set
421     * @return void
422     */
423     public function offsetSet($offset, $value)
424     {
425         throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
426     }
427    
428    
429     /**
430     * Required by ArrayAccess interface
431     *
432     * @internal
433      *
434     * @param  integer|string $offset  The offset to unset
435     * @return void
436     */   
437     public function offsetUnset($offset)
438     {
439         throw new fProgrammerException('The %s class does not support modifying XML', __CLASS__);
440     }
441    
442    
443     /**
444     * Performs an XPath query on the current element, returning the raw results
445     *
446     * @param  string $path  The XPath path to query
447     * @return array  The matching elements
448     */
449     protected function query($path)
450     {
451         if (!$this->__xpath) {
452             $this->__xpath = new DOMXPath($this->__dom->ownerDocument);
453             if ($this->__custom_prefixes) {
454                 foreach ($this->__custom_prefixes as $prefix => $namespace) {
455                     $this->__xpath->registerNamespace($prefix, $namespace);
456                 }
457             }   
458         }
459        
460         // Prevent spitting out errors to we can throw exceptions
461         $old_setting = libxml_use_internal_errors(TRUE);
462        
463         $result = $this->__xpath->query($path, $this->__dom);
464        
465         // We want it to be clear when XML parsing issues occur
466         if ($result === FALSE) {
467             $errors            = libxml_get_errors();
468             $exception_message = '';
469            
470             foreach ($errors as $error) {
471                 $exception_message .= "\n" . $error->message;   
472             }
473            
474             // If internal errors were off before, turn them back off
475             if (!$old_setting) {
476                 libxml_use_internal_errors(FALSE);   
477             }
478            
479             throw new fProgrammerException(str_replace('%', '%%', trim($exception_message)));
480         }
481        
482         if (!$old_setting) {
483             libxml_use_internal_errors(FALSE);   
484         }
485        
486         return $result;   
487     }
488    
489    
490     /**
491     * Returns a well-formed XML string from the current element
492     *
493     * @return string  The XML
494     */
495     public function toXML()
496     {
497         return $this->__dom->ownerDocument->saveXML($this->__dom->parentNode === $this->__dom->ownerDocument ? $this->__dom->parentNode : $this->__dom);   
498     }
499    
500    
501     /**
502     * Executes an XPath query on the current element, returning an array of matching elements
503     *
504     * @param  string  $path        The XPath path to query
505     * @param  boolean $first_only  If only the first match should be returned
506     * @return array|string|fXML  An array of matching elements, or a string or fXML object if `$first_only` is `TRUE`
507     */
508     public function xpath($path, $first_only=FALSE)
509     {
510         $result = $this->query($path);
511        
512         if ($first_only) {
513             if (!$result->length) { return NULL; }
514             $result = array($result->item(0));
515            
516         } else {
517             if (!$result->length) { return array(); }
518         }
519        
520         $keys_to_remove = array();
521         $output         = array();
522        
523         foreach ($result as $element) {
524            
525             if ($element instanceof DOMElement) {
526                 $child = new fXML($element);
527                 $child->__custom_prefixes = $this->__custom_prefixes;
528                 if ($child->__dom->namespaceURI && $child->__dom->prefix == '') {
529                     $child->addCustomPrefix('__', $child->__dom->namespaceURI);
530                 }
531                 $output[] = $child;
532            
533             } elseif ($element instanceof DOMCharacterData) {
534                 $output[] = $element->data;
535            
536             } elseif ($element instanceof DOMAttr) {
537                
538                 $key      = $element->name;
539                 if ($element->prefix) {
540                     $key = $element->prefix . ':' . $key;   
541                 }
542                
543                 // We will create an attrname and attrname[0] key for each
544                 // attribute and if more than one is found we remove the
545                 // key attrname. If only one is found we remove attrname[0].
546                 $key_1 = $key . '[1]';
547                
548                 if (isset($output[$key_1])) {
549                     $i = 1;
550                     while (isset($output[$key . '[' . $i . ']'])) {
551                         $i++;
552                     }
553                    
554                     // This removes the key without the array index if more than one was found
555                     unset($output[$key]);
556                     unset($keys_to_remove[$key_1]);
557                    
558                     $key = $key . '[' . $i . ']';
559                
560                 } else {
561                     $output[$key_1] = $element->nodeValue;
562                     $keys_to_remove[$key_1] = TRUE;       
563                 }
564                
565                 $output[$key] = $element->nodeValue;   
566             }
567         }
568        
569         foreach ($keys_to_remove as $key => $trash) {
570             unset($output[$key]);   
571         }
572        
573         if ($first_only) {
574             return current($output);   
575         }
576        
577         return $output;
578     }
579 }
580  
581  
582  
583 /**
584  * Copyright (c) 2007-2010 Will Bond <will@flourishlib.com>
585  *
586  * Permission is hereby granted, free of charge, to any person obtaining a copy
587  * of this software and associated documentation files (the "Software"), to deal
588  * in the Software without restriction, including without limitation the rights
589  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
590  * copies of the Software, and to permit persons to whom the Software is
591  * furnished to do so, subject to the following conditions:
592  *
593  * The above copyright notice and this permission notice shall be included in
594  * all copies or substantial portions of the Software.
595  *
596  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
597  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
598  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
599  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
600  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
601  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
602  * THE SOFTWARE.
603  */