raw
mp-wp_genesis           1 <?php
mp-wp_genesis 2 /**
mp-wp_genesis 3 * Atom Syndication Format PHP Library
mp-wp_genesis 4 *
mp-wp_genesis 5 * @package AtomLib
mp-wp_genesis 6 * @link http://code.google.com/p/phpatomlib/
mp-wp_genesis 7 *
mp-wp_genesis 8 * @author Elias Torres <elias@torrez.us>
mp-wp_genesis 9 * @version 0.4
mp-wp_genesis 10 * @since 2.3
mp-wp_genesis 11 */
mp-wp_genesis 12
mp-wp_genesis 13 /**
mp-wp_genesis 14 * Structure that store common Atom Feed Properties
mp-wp_genesis 15 *
mp-wp_genesis 16 * @package AtomLib
mp-wp_genesis 17 */
mp-wp_genesis 18 class AtomFeed {
mp-wp_genesis 19 /**
mp-wp_genesis 20 * Stores Links
mp-wp_genesis 21 * @var array
mp-wp_genesis 22 * @access public
mp-wp_genesis 23 */
mp-wp_genesis 24 var $links = array();
mp-wp_genesis 25 /**
mp-wp_genesis 26 * Stores Categories
mp-wp_genesis 27 * @var array
mp-wp_genesis 28 * @access public
mp-wp_genesis 29 */
mp-wp_genesis 30 var $categories = array();
mp-wp_genesis 31 /**
mp-wp_genesis 32 * Stores Entries
mp-wp_genesis 33 *
mp-wp_genesis 34 * @var array
mp-wp_genesis 35 * @access public
mp-wp_genesis 36 */
mp-wp_genesis 37 var $entries = array();
mp-wp_genesis 38 }
mp-wp_genesis 39
mp-wp_genesis 40 /**
mp-wp_genesis 41 * Structure that store Atom Entry Properties
mp-wp_genesis 42 *
mp-wp_genesis 43 * @package AtomLib
mp-wp_genesis 44 */
mp-wp_genesis 45 class AtomEntry {
mp-wp_genesis 46 /**
mp-wp_genesis 47 * Stores Links
mp-wp_genesis 48 * @var array
mp-wp_genesis 49 * @access public
mp-wp_genesis 50 */
mp-wp_genesis 51 var $links = array();
mp-wp_genesis 52 /**
mp-wp_genesis 53 * Stores Categories
mp-wp_genesis 54 * @var array
mp-wp_genesis 55 * @access public
mp-wp_genesis 56 */
mp-wp_genesis 57 var $categories = array();
mp-wp_genesis 58 }
mp-wp_genesis 59
mp-wp_genesis 60 /**
mp-wp_genesis 61 * AtomLib Atom Parser API
mp-wp_genesis 62 *
mp-wp_genesis 63 * @package AtomLib
mp-wp_genesis 64 */
mp-wp_genesis 65 class AtomParser {
mp-wp_genesis 66
mp-wp_genesis 67 var $NS = 'http://www.w3.org/2005/Atom';
mp-wp_genesis 68 var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights');
mp-wp_genesis 69 var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft');
mp-wp_genesis 70
mp-wp_genesis 71 var $debug = false;
mp-wp_genesis 72
mp-wp_genesis 73 var $depth = 0;
mp-wp_genesis 74 var $indent = 2;
mp-wp_genesis 75 var $in_content;
mp-wp_genesis 76 var $ns_contexts = array();
mp-wp_genesis 77 var $ns_decls = array();
mp-wp_genesis 78 var $content_ns_decls = array();
mp-wp_genesis 79 var $content_ns_contexts = array();
mp-wp_genesis 80 var $is_xhtml = false;
mp-wp_genesis 81 var $is_html = false;
mp-wp_genesis 82 var $is_text = true;
mp-wp_genesis 83 var $skipped_div = false;
mp-wp_genesis 84
mp-wp_genesis 85 var $FILE = "php://input";
mp-wp_genesis 86
mp-wp_genesis 87 var $feed;
mp-wp_genesis 88 var $current;
mp-wp_genesis 89
mp-wp_genesis 90 function AtomParser() {
mp-wp_genesis 91
mp-wp_genesis 92 $this->feed = new AtomFeed();
mp-wp_genesis 93 $this->current = null;
mp-wp_genesis 94 $this->map_attrs_func = create_function('$k,$v', 'return "$k=\"$v\"";');
mp-wp_genesis 95 $this->map_xmlns_func = create_function('$p,$n', '$xd = "xmlns"; if(strlen($n[0])>0) $xd .= ":{$n[0]}"; return "{$xd}=\"{$n[1]}\"";');
mp-wp_genesis 96 }
mp-wp_genesis 97
mp-wp_genesis 98 function _p($msg) {
mp-wp_genesis 99 if($this->debug) {
mp-wp_genesis 100 print str_repeat(" ", $this->depth * $this->indent) . $msg ."\n";
mp-wp_genesis 101 }
mp-wp_genesis 102 }
mp-wp_genesis 103
mp-wp_genesis 104 function error_handler($log_level, $log_text, $error_file, $error_line) {
mp-wp_genesis 105 $this->error = $log_text;
mp-wp_genesis 106 }
mp-wp_genesis 107
mp-wp_genesis 108 function parse() {
mp-wp_genesis 109
mp-wp_genesis 110 set_error_handler(array(&$this, 'error_handler'));
mp-wp_genesis 111
mp-wp_genesis 112 array_unshift($this->ns_contexts, array());
mp-wp_genesis 113
mp-wp_genesis 114 $parser = xml_parser_create_ns();
mp-wp_genesis 115 xml_set_object($parser, $this);
mp-wp_genesis 116 xml_set_element_handler($parser, "start_element", "end_element");
mp-wp_genesis 117 xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0);
mp-wp_genesis 118 xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0);
mp-wp_genesis 119 xml_set_character_data_handler($parser, "cdata");
mp-wp_genesis 120 xml_set_default_handler($parser, "_default");
mp-wp_genesis 121 xml_set_start_namespace_decl_handler($parser, "start_ns");
mp-wp_genesis 122 xml_set_end_namespace_decl_handler($parser, "end_ns");
mp-wp_genesis 123
mp-wp_genesis 124 $this->content = '';
mp-wp_genesis 125
mp-wp_genesis 126 $ret = true;
mp-wp_genesis 127
mp-wp_genesis 128 $fp = fopen($this->FILE, "r");
mp-wp_genesis 129 while ($data = fread($fp, 4096)) {
mp-wp_genesis 130 if($this->debug) $this->content .= $data;
mp-wp_genesis 131
mp-wp_genesis 132 if(!xml_parse($parser, $data, feof($fp))) {
mp-wp_genesis 133 trigger_error(sprintf(__('XML error: %s at line %d')."\n",
mp-wp_genesis 134 xml_error_string(xml_get_error_code($xml_parser)),
mp-wp_genesis 135 xml_get_current_line_number($xml_parser)));
mp-wp_genesis 136 $ret = false;
mp-wp_genesis 137 break;
mp-wp_genesis 138 }
mp-wp_genesis 139 }
mp-wp_genesis 140 fclose($fp);
mp-wp_genesis 141
mp-wp_genesis 142 xml_parser_free($parser);
mp-wp_genesis 143
mp-wp_genesis 144 restore_error_handler();
mp-wp_genesis 145
mp-wp_genesis 146 return $ret;
mp-wp_genesis 147 }
mp-wp_genesis 148
mp-wp_genesis 149 function start_element($parser, $name, $attrs) {
mp-wp_genesis 150
mp-wp_genesis 151 $tag = array_pop(split(":", $name));
mp-wp_genesis 152
mp-wp_genesis 153 switch($name) {
mp-wp_genesis 154 case $this->NS . ':feed':
mp-wp_genesis 155 $this->current = $this->feed;
mp-wp_genesis 156 break;
mp-wp_genesis 157 case $this->NS . ':entry':
mp-wp_genesis 158 $this->current = new AtomEntry();
mp-wp_genesis 159 break;
mp-wp_genesis 160 };
mp-wp_genesis 161
mp-wp_genesis 162 $this->_p("start_element('$name')");
mp-wp_genesis 163 #$this->_p(print_r($this->ns_contexts,true));
mp-wp_genesis 164 #$this->_p('current(' . $this->current . ')');
mp-wp_genesis 165
mp-wp_genesis 166 array_unshift($this->ns_contexts, $this->ns_decls);
mp-wp_genesis 167
mp-wp_genesis 168 $this->depth++;
mp-wp_genesis 169
mp-wp_genesis 170 if(!empty($this->in_content)) {
mp-wp_genesis 171
mp-wp_genesis 172 $this->content_ns_decls = array();
mp-wp_genesis 173
mp-wp_genesis 174 if($this->is_html || $this->is_text)
mp-wp_genesis 175 trigger_error("Invalid content in element found. Content must not be of type text or html if it contains markup.");
mp-wp_genesis 176
mp-wp_genesis 177 $attrs_prefix = array();
mp-wp_genesis 178
mp-wp_genesis 179 // resolve prefixes for attributes
mp-wp_genesis 180 foreach($attrs as $key => $value) {
mp-wp_genesis 181 $with_prefix = $this->ns_to_prefix($key, true);
mp-wp_genesis 182 $attrs_prefix[$with_prefix[1]] = $this->xml_escape($value);
mp-wp_genesis 183 }
mp-wp_genesis 184
mp-wp_genesis 185 $attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix)));
mp-wp_genesis 186 if(strlen($attrs_str) > 0) {
mp-wp_genesis 187 $attrs_str = " " . $attrs_str;
mp-wp_genesis 188 }
mp-wp_genesis 189
mp-wp_genesis 190 $with_prefix = $this->ns_to_prefix($name);
mp-wp_genesis 191
mp-wp_genesis 192 if(!$this->is_declared_content_ns($with_prefix[0])) {
mp-wp_genesis 193 array_push($this->content_ns_decls, $with_prefix[0]);
mp-wp_genesis 194 }
mp-wp_genesis 195
mp-wp_genesis 196 $xmlns_str = '';
mp-wp_genesis 197 if(count($this->content_ns_decls) > 0) {
mp-wp_genesis 198 array_unshift($this->content_ns_contexts, $this->content_ns_decls);
mp-wp_genesis 199 $xmlns_str .= join(' ', array_map($this->map_xmlns_func, array_keys($this->content_ns_contexts[0]), array_values($this->content_ns_contexts[0])));
mp-wp_genesis 200 if(strlen($xmlns_str) > 0) {
mp-wp_genesis 201 $xmlns_str = " " . $xmlns_str;
mp-wp_genesis 202 }
mp-wp_genesis 203 }
mp-wp_genesis 204
mp-wp_genesis 205 array_push($this->in_content, array($tag, $this->depth, "<". $with_prefix[1] ."{$xmlns_str}{$attrs_str}" . ">"));
mp-wp_genesis 206
mp-wp_genesis 207 } else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) {
mp-wp_genesis 208 $this->in_content = array();
mp-wp_genesis 209 $this->is_xhtml = $attrs['type'] == 'xhtml';
mp-wp_genesis 210 $this->is_html = $attrs['type'] == 'html' || $attrs['type'] == 'text/html';
mp-wp_genesis 211 $this->is_text = !in_array('type',array_keys($attrs)) || $attrs['type'] == 'text';
mp-wp_genesis 212 $type = $this->is_xhtml ? 'XHTML' : ($this->is_html ? 'HTML' : ($this->is_text ? 'TEXT' : $attrs['type']));
mp-wp_genesis 213
mp-wp_genesis 214 if(in_array('src',array_keys($attrs))) {
mp-wp_genesis 215 $this->current->$tag = $attrs;
mp-wp_genesis 216 } else {
mp-wp_genesis 217 array_push($this->in_content, array($tag,$this->depth, $type));
mp-wp_genesis 218 }
mp-wp_genesis 219 } else if($tag == 'link') {
mp-wp_genesis 220 array_push($this->current->links, $attrs);
mp-wp_genesis 221 } else if($tag == 'category') {
mp-wp_genesis 222 array_push($this->current->categories, $attrs);
mp-wp_genesis 223 }
mp-wp_genesis 224
mp-wp_genesis 225 $this->ns_decls = array();
mp-wp_genesis 226 }
mp-wp_genesis 227
mp-wp_genesis 228 function end_element($parser, $name) {
mp-wp_genesis 229
mp-wp_genesis 230 $tag = array_pop(split(":", $name));
mp-wp_genesis 231
mp-wp_genesis 232 $ccount = count($this->in_content);
mp-wp_genesis 233
mp-wp_genesis 234 # if we are *in* content, then let's proceed to serialize it
mp-wp_genesis 235 if(!empty($this->in_content)) {
mp-wp_genesis 236 # if we are ending the original content element
mp-wp_genesis 237 # then let's finalize the content
mp-wp_genesis 238 if($this->in_content[0][0] == $tag &&
mp-wp_genesis 239 $this->in_content[0][1] == $this->depth) {
mp-wp_genesis 240 $origtype = $this->in_content[0][2];
mp-wp_genesis 241 array_shift($this->in_content);
mp-wp_genesis 242 $newcontent = array();
mp-wp_genesis 243 foreach($this->in_content as $c) {
mp-wp_genesis 244 if(count($c) == 3) {
mp-wp_genesis 245 array_push($newcontent, $c[2]);
mp-wp_genesis 246 } else {
mp-wp_genesis 247 if($this->is_xhtml || $this->is_text) {
mp-wp_genesis 248 array_push($newcontent, $this->xml_escape($c));
mp-wp_genesis 249 } else {
mp-wp_genesis 250 array_push($newcontent, $c);
mp-wp_genesis 251 }
mp-wp_genesis 252 }
mp-wp_genesis 253 }
mp-wp_genesis 254 if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS)) {
mp-wp_genesis 255 $this->current->$tag = array($origtype, join('',$newcontent));
mp-wp_genesis 256 } else {
mp-wp_genesis 257 $this->current->$tag = join('',$newcontent);
mp-wp_genesis 258 }
mp-wp_genesis 259 $this->in_content = array();
mp-wp_genesis 260 } else if($this->in_content[$ccount-1][0] == $tag &&
mp-wp_genesis 261 $this->in_content[$ccount-1][1] == $this->depth) {
mp-wp_genesis 262 $this->in_content[$ccount-1][2] = substr($this->in_content[$ccount-1][2],0,-1) . "/>";
mp-wp_genesis 263 } else {
mp-wp_genesis 264 # else, just finalize the current element's content
mp-wp_genesis 265 $endtag = $this->ns_to_prefix($name);
mp-wp_genesis 266 array_push($this->in_content, array($tag, $this->depth, "</$endtag[1]>"));
mp-wp_genesis 267 }
mp-wp_genesis 268 }
mp-wp_genesis 269
mp-wp_genesis 270 array_shift($this->ns_contexts);
mp-wp_genesis 271
mp-wp_genesis 272 $this->depth--;
mp-wp_genesis 273
mp-wp_genesis 274 if($name == ($this->NS . ':entry')) {
mp-wp_genesis 275 array_push($this->feed->entries, $this->current);
mp-wp_genesis 276 $this->current = null;
mp-wp_genesis 277 }
mp-wp_genesis 278
mp-wp_genesis 279 $this->_p("end_element('$name')");
mp-wp_genesis 280 }
mp-wp_genesis 281
mp-wp_genesis 282 function start_ns($parser, $prefix, $uri) {
mp-wp_genesis 283 $this->_p("starting: " . $prefix . ":" . $uri);
mp-wp_genesis 284 array_push($this->ns_decls, array($prefix,$uri));
mp-wp_genesis 285 }
mp-wp_genesis 286
mp-wp_genesis 287 function end_ns($parser, $prefix) {
mp-wp_genesis 288 $this->_p("ending: #" . $prefix . "#");
mp-wp_genesis 289 }
mp-wp_genesis 290
mp-wp_genesis 291 function cdata($parser, $data) {
mp-wp_genesis 292 $this->_p("data: #" . str_replace(array("\n"), array("\\n"), trim($data)) . "#");
mp-wp_genesis 293 if(!empty($this->in_content)) {
mp-wp_genesis 294 array_push($this->in_content, $data);
mp-wp_genesis 295 }
mp-wp_genesis 296 }
mp-wp_genesis 297
mp-wp_genesis 298 function _default($parser, $data) {
mp-wp_genesis 299 # when does this gets called?
mp-wp_genesis 300 }
mp-wp_genesis 301
mp-wp_genesis 302
mp-wp_genesis 303 function ns_to_prefix($qname, $attr=false) {
mp-wp_genesis 304 # split 'http://www.w3.org/1999/xhtml:div' into ('http','//www.w3.org/1999/xhtml','div')
mp-wp_genesis 305 $components = split(":", $qname);
mp-wp_genesis 306
mp-wp_genesis 307 # grab the last one (e.g 'div')
mp-wp_genesis 308 $name = array_pop($components);
mp-wp_genesis 309
mp-wp_genesis 310 if(!empty($components)) {
mp-wp_genesis 311 # re-join back the namespace component
mp-wp_genesis 312 $ns = join(":",$components);
mp-wp_genesis 313 foreach($this->ns_contexts as $context) {
mp-wp_genesis 314 foreach($context as $mapping) {
mp-wp_genesis 315 if($mapping[1] == $ns && strlen($mapping[0]) > 0) {
mp-wp_genesis 316 return array($mapping, "$mapping[0]:$name");
mp-wp_genesis 317 }
mp-wp_genesis 318 }
mp-wp_genesis 319 }
mp-wp_genesis 320 }
mp-wp_genesis 321
mp-wp_genesis 322 if($attr) {
mp-wp_genesis 323 return array(null, $name);
mp-wp_genesis 324 } else {
mp-wp_genesis 325 foreach($this->ns_contexts as $context) {
mp-wp_genesis 326 foreach($context as $mapping) {
mp-wp_genesis 327 if(strlen($mapping[0]) == 0) {
mp-wp_genesis 328 return array($mapping, $name);
mp-wp_genesis 329 }
mp-wp_genesis 330 }
mp-wp_genesis 331 }
mp-wp_genesis 332 }
mp-wp_genesis 333 }
mp-wp_genesis 334
mp-wp_genesis 335 function is_declared_content_ns($new_mapping) {
mp-wp_genesis 336 foreach($this->content_ns_contexts as $context) {
mp-wp_genesis 337 foreach($context as $mapping) {
mp-wp_genesis 338 if($new_mapping == $mapping) {
mp-wp_genesis 339 return true;
mp-wp_genesis 340 }
mp-wp_genesis 341 }
mp-wp_genesis 342 }
mp-wp_genesis 343 return false;
mp-wp_genesis 344 }
mp-wp_genesis 345
mp-wp_genesis 346 function xml_escape($string)
mp-wp_genesis 347 {
mp-wp_genesis 348 return str_replace(array('&','"',"'",'<','>'),
mp-wp_genesis 349 array('&amp;','&quot;','&apos;','&lt;','&gt;'),
mp-wp_genesis 350 $string );
mp-wp_genesis 351 }
mp-wp_genesis 352 }
mp-wp_genesis 353
mp-wp_genesis 354 ?>