Source for file Snoopy.class.inc

Documentation is available at Snoopy.class.inc

  1. <?php
  2.  
  3. /*************************************************
  4.  
  5. Snoopy - the PHP net client
  6. Author: Monte Ohrt <monte@ispi.net>
  7. Copyright (c): 1999-2000 ispi, all rights reserved
  8. Version: 1.0
  9.  
  10.  * This library is free software; you can redistribute it and/or
  11.  * modify it under the terms of the GNU Lesser General Public
  12.  * License as published by the Free Software Foundation; either
  13.  * version 2.1 of the License, or (at your option) any later version.
  14.  *
  15.  * This library is distributed in the hope that it will be useful,
  16.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18.  * Lesser General Public License for more details.
  19.  *
  20.  * You should have received a copy of the GNU Lesser General Public
  21.  * License along with this library; if not, write to the Free Software
  22.  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23.  
  24. You may contact the author of Snoopy by e-mail at:
  25. monte@ispi.net
  26.  
  27. Or, write to:
  28. Monte Ohrt
  29. CTO, ispi
  30. 237 S. 70th suite 220
  31. Lincoln, NE 68510
  32.  
  33. The latest version of Snoopy can be obtained from:
  34. http://snoopy.sourceforge.com
  35.  
  36. *************************************************/
  37.  
  38. class Snoopy
  39. {
  40.     /**** Public variables ****/
  41.     
  42.     /* user definable vars */
  43.  
  44.     var $host            =    "www.php.net";        // host name we are connecting to
  45.     var $port            =    80;                    // port we are connecting to
  46.     var $proxy_host        =    "";                    // proxy host to use
  47.     var $proxy_port        =    "";                    // proxy port to use
  48.     var $agent            =    "Snoopy v1.0";        // agent we masquerade as
  49.     var    $referer        =    "";                    // referer info to pass
  50.     var $cookies        =    array();            // array of cookies to pass
  51.                                                 // $cookies["username"]="joe";
  52.     var    $rawheaders        =    array();            // array of raw headers to send
  53.                                                 // $rawheaders["Content-type"]="text/html";
  54.  
  55.     var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
  56.     var $lastredirectaddr    =    "";                // contains address of last redirected address
  57.     var    $offsiteok        =    true;                // allows redirection off-site
  58.     var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
  59.     var $expandlinks    =    true;                // expand links to fully qualified URLs.
  60.                                                 // this only applies to fetchlinks()
  61.                                                 // or submitlinks()
  62.     var $passcookies    =    true;                // pass set cookies back through redirects
  63.                                                 // NOTE: this currently does not respect
  64.                                                 // dates, domains or paths.
  65.     
  66.     var    $user            =    "";                    // user for http authentication
  67.     var    $pass            =    "";                    // password for http authentication
  68.     
  69.     // http accept types
  70.     var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  71.     
  72.     var $results        =    "";                    // where the content is put
  73.         
  74.     var $error            =    "";                    // error messages sent here
  75.     var    $response_code    =    "";                    // response code returned from server
  76.     var    $headers        =    array();            // headers returned from server sent here
  77.     var    $maxlength        =    500000;                // max return data length (body)
  78.     var $read_timeout    =    0;                    // timeout on read operations, in seconds
  79.                                                 // supported only since PHP 4 Beta 4
  80.                                                 // set to 0 to disallow timeouts
  81.     var $timed_out        =    false;                // if a read operation timed out
  82.     var    $status            =    0;                    // http request status
  83.     
  84.     var    $curl_path        =    "/usr/bin/curl";
  85.                                                 // Snoopy will use cURL for fetching
  86.                                                 // SSL content if a full system path to
  87.                                                 // the cURL binary is supplied here.
  88.                                                 // set to false if you do not have
  89.                                                 // cURL installed. See http://curl.haxx.se
  90.                                                 // for details on installing cURL.
  91.                                                 // Snoopy does *not* use the cURL
  92.                                                 // library functions built into php,
  93.                                                 // as these functions are not stable
  94.                                                 // as of this Snoopy release.
  95.     
  96.     // send Accept-encoding: gzip?
  97.     var $use_gzip        = true;    
  98.     
  99.     /**** Private variables ****/    
  100.     
  101.     var    $_maxlinelen    =    4096;                // max line length (headers)
  102.     
  103.     var $_httpmethod    =    "GET";                // default http request method
  104.     var $_httpversion    =    "HTTP/1.0";            // default http request version
  105.     var $_submit_method    =    "POST";                // default submit method
  106.     var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
  107.     var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
  108.     var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
  109.     var $_redirectdepth    =    0;                    // increments on an http redirect
  110.     var $_frameurls        =     array();            // frame src urls
  111.     var $_framedepth    =    0;                    // increments on frame depth
  112.     
  113.     var $_isproxy        =    false;                // set if using a proxy server
  114.     var $_fp_timeout    =    30;                    // timeout for socket connection
  115.  
  116. /*======================================================================*\
  117.     Function:    fetch
  118.     Purpose:    fetch the contents of a web page
  119.                 (and possibly other protocols in the
  120.                 future like ftp, nntp, gopher, etc.)
  121.     Input:        $URI    the location of the page to fetch
  122.     Output:        $this->results    the output text from the fetch
  123. \*======================================================================*/
  124.  
  125.     function fetch($URI)
  126.     {
  127.     
  128.         //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  129.         $URI_PARTS parse_url($URI);
  130.         if (!empty($URI_PARTS["user"]))
  131.             $this->user = $URI_PARTS["user"];
  132.         if (!empty($URI_PARTS["pass"]))
  133.             $this->pass = $URI_PARTS["pass"];
  134.                 
  135.         switch($URI_PARTS["scheme"])
  136.         {
  137.             case "http":
  138.                 $this->host = $URI_PARTS["host"];
  139.                 if(!empty($URI_PARTS["port"]))
  140.                     $this->port = $URI_PARTS["port"];
  141.                 if($this->_connect($fp))
  142.                 {
  143.                     if($this->_isproxy)
  144.                     {
  145.                         // using proxy, send entire URI
  146.                         $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  147.                     }
  148.                     else
  149.                     {
  150.                         $path $URI_PARTS["path"].(isset($URI_PARTS["query"]"?".$URI_PARTS["query""");
  151.                         // no proxy, send only the path
  152.                         $this->_httprequest($path$fp$URI$this->_httpmethod);
  153.                     }
  154.                     
  155.                     $this->_disconnect($fp);
  156.  
  157.                     if($this->_redirectaddr)
  158.                     {
  159.                         /* url was redirected, check if we've hit the max depth */
  160.                         if($this->maxredirs > $this->_redirectdepth)
  161.                         {
  162.                             // only follow redirect if it's on this site, or offsiteok is true
  163.                             if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr|| $this->offsiteok)
  164.                             {
  165.                                 /* follow the redirect */
  166.                                 $this->_redirectdepth++;
  167.                                 $this->lastredirectaddr=$this->_redirectaddr;
  168.                                 $this->fetch($this->_redirectaddr);
  169.                             }
  170.                         }
  171.                     }
  172.  
  173.                     if($this->_framedepth < $this->maxframes && count($this->_frameurls0)
  174.                     {
  175.                         $frameurls $this->_frameurls;
  176.                         $this->_frameurls = array();
  177.                         
  178.                         while(list(,$frameurleach($frameurls))
  179.                         {
  180.                             if($this->_framedepth < $this->maxframes)
  181.                             {
  182.                                 $this->fetch($frameurl);
  183.                                 $this->_framedepth++;
  184.                             }
  185.                             else
  186.                                 break;
  187.                         }
  188.                     }                    
  189.                 }
  190.                 else
  191.                 {
  192.                     return false;
  193.                 }
  194.                 return true;                    
  195.                 break;
  196.             case "https":
  197.                 if(!$this->curl_path || (!is_executable($this->curl_path))) {
  198.                     $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
  199.                     return false;
  200.                 }
  201.                 $this->host = $URI_PARTS["host"];
  202.                 if(!empty($URI_PARTS["port"]))
  203.                     $this->port = $URI_PARTS["port"];
  204.                 if($this->_isproxy)
  205.                 {
  206.                     // using proxy, send entire URI
  207.                     $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  208.                 }
  209.                 else
  210.                 {
  211.                     $path $URI_PARTS["path"].($URI_PARTS["query""?".$URI_PARTS["query""");
  212.                     // no proxy, send only the path
  213.                     $this->_httpsrequest($path$URI$this->_httpmethod);
  214.                 }
  215.  
  216.                 if($this->_redirectaddr)
  217.                 {
  218.                     /* url was redirected, check if we've hit the max depth */
  219.                     if($this->maxredirs > $this->_redirectdepth)
  220.                     {
  221.                         // only follow redirect if it's on this site, or offsiteok is true
  222.                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr|| $this->offsiteok)
  223.                         {
  224.                             /* follow the redirect */
  225.                             $this->_redirectdepth++;
  226.                             $this->lastredirectaddr=$this->_redirectaddr;
  227.                             $this->fetch($this->_redirectaddr);
  228.                         }
  229.                     }
  230.                 }
  231.  
  232.                 if($this->_framedepth < $this->maxframes && count($this->_frameurls0)
  233.                 {
  234.                     $frameurls $this->_frameurls;
  235.                     $this->_frameurls = array();
  236.  
  237.                     while(list(,$frameurleach($frameurls))
  238.                     {
  239.                         if($this->_framedepth < $this->maxframes)
  240.                         {
  241.                             $this->fetch($frameurl);
  242.                             $this->_framedepth++;
  243.                         }
  244.                         else
  245.                             break;
  246.                     }
  247.                 }                    
  248.                 return true;                    
  249.                 break;
  250.             default:
  251.                 // not a valid protocol
  252.                 $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  253.                 return false;
  254.                 break;
  255.         }        
  256.         return true;
  257.     }
  258.  
  259.  
  260.  
  261. /*======================================================================*\
  262.     Private functions
  263. \*======================================================================*/
  264.     
  265.     
  266. /*======================================================================*\
  267.     Function:    _striplinks
  268.     Purpose:    strip the hyperlinks from an html document
  269.     Input:        $document    document to strip.
  270.     Output:        $match        an array of the links
  271. \*======================================================================*/
  272.  
  273.     function _striplinks($document)
  274.     {    
  275.         preg_match_all("'<\s*a\s+.*href\s*=\s*            # find <a href=
  276.                         ([\"\'])?                    # find single or double quote
  277.                         (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
  278.                                                     # quote, otherwise match up to next space
  279.                         'isx",$document,$links);
  280.                         
  281.  
  282.         // catenate the non-empty matches from the conditional subpattern
  283.  
  284.         while(list($key,$valeach($links[2]))
  285.         {
  286.             if(!empty($val))
  287.                 $match[$val;
  288.         }                
  289.         
  290.         while(list($key,$valeach($links[3]))
  291.         {
  292.             if(!empty($val))
  293.                 $match[$val;
  294.         }        
  295.         
  296.         // return the links
  297.         return $match;
  298.     }
  299.  
  300. /*======================================================================*\
  301.     Function:    _stripform
  302.     Purpose:    strip the form elements from an html document
  303.     Input:        $document    document to strip.
  304.     Output:        $match        an array of the links
  305. \*======================================================================*/
  306.  
  307.     function _stripform($document)
  308.     {    
  309.         preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  310.         
  311.         // catenate the matches
  312.         $match implode("\r\n",$elements[0]);
  313.                 
  314.         // return the links
  315.         return $match;
  316.     }
  317.  
  318.     
  319.     
  320. /*======================================================================*\
  321.     Function:    _striptext
  322.     Purpose:    strip the text from an html document
  323.     Input:        $document    document to strip.
  324.     Output:        $text        the resulting text
  325. \*======================================================================*/
  326.  
  327.     function _striptext($document)
  328.     {
  329.         
  330.         // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  331.         // so, list your entities one by one here. I included some of the
  332.         // more common ones.
  333.                                 
  334.         $search array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
  335.                         "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
  336.                         "'([\r\n])[\s]+'",                    // strip out white space
  337.                         "'&(quote|#34);'i",                    // replace html entities
  338.                         "'&(amp|#38);'i",
  339.                         "'&(lt|#60);'i",
  340.                         "'&(gt|#62);'i",
  341.                         "'&(nbsp|#160);'i",
  342.                         "'&(iexcl|#161);'i",
  343.                         "'&(cent|#162);'i",
  344.                         "'&(pound|#163);'i",
  345.                         "'&(copy|#169);'i"
  346.                         );                
  347.         $replace array(    "",
  348.                             "",
  349.                             "\\1",
  350.                             "\"",
  351.                             "&",
  352.                             "<",
  353.                             ">",
  354.                             " ",
  355.                             chr(161),
  356.                             chr(162),
  357.                             chr(163),
  358.                             chr(169));
  359.                     
  360.         $text preg_replace($search,$replace,$document);
  361.                                 
  362.         return $text;
  363.     }
  364.  
  365. /*======================================================================*\
  366.     Function:    _expandlinks
  367.     Purpose:    expand each link into a fully qualified URL
  368.     Input:        $links            the links to qualify
  369.                 $URI            the full URI to get the base from
  370.     Output:        $expandedLinks    the expanded links
  371. \*======================================================================*/
  372.  
  373.     function _expandlinks($links,$URI)
  374.     {
  375.         
  376.         preg_match("/^[^\?]+/",$URI,$match);
  377.  
  378.         $match preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  379.                 
  380.         $search array(     "|^http://".preg_quote($this->host)."|i",
  381.                             "|^(?!http://)(\/)?(?!mailto:)|i",
  382.                             "|/\./|",
  383.                             "|/[^\/]+/\.\./|"
  384.                         );
  385.                         
  386.         $replace array(    "",
  387.                             $match."/",
  388.                             "/",
  389.                             "/"
  390.                         );            
  391.                 
  392.         $expandedLinks preg_replace($search,$replace,$links);
  393.  
  394.         return $expandedLinks;
  395.     }
  396.  
  397. /*======================================================================*\
  398.     Function:    _httprequest
  399.     Purpose:    go get the http data from the server
  400.     Input:        $url        the url to fetch
  401.                 $fp            the current open file pointer
  402.                 $URI        the full URI
  403.                 $body        body contents to send if any (POST)
  404.     Output:        
  405. \*======================================================================*/
  406.     
  407.     function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  408.     {
  409.         if($this->passcookies && $this->_redirectaddr)
  410.             $this->setcookies();
  411.             
  412.         $URI_PARTS parse_url($URI);
  413.         if(empty($url))
  414.             $url "/";
  415.         $headers $http_method." ".$url." ".$this->_httpversion."\r\n";        
  416.         if(!empty($this->agent))
  417.             $headers .= "User-Agent: ".$this->agent."\r\n";
  418.         if(!empty($this->host&& !isset($this->rawheaders['Host']))
  419.             $headers .= "Host: ".$this->host."\r\n";
  420.         if(!empty($this->accept))
  421.             $headers .= "Accept: ".$this->accept."\r\n";
  422.         
  423.         if($this->use_gzip{
  424.             // make sure PHP was built with --with-zlib
  425.             // and we can handle gzipp'ed data
  426.             if function_exists(gzinflate) ) {
  427.                $headers .= "Accept-encoding: gzip\r\n";
  428.             }
  429.             else {
  430.                trigger_error(
  431.                    "use_gzip is on, but PHP was built without zlib support.".
  432.                 "  Requesting file(s) without gzip encoding."
  433.                 E_USER_NOTICE);
  434.             }
  435.         }
  436.         
  437.         if(!empty($this->referer))
  438.             $headers .= "Referer: ".$this->referer."\r\n";
  439.         if(!empty($this->cookies))
  440.         {            
  441.             if(!is_array($this->cookies))
  442.                 $this->cookies = (array)$this->cookies;
  443.     
  444.             reset($this->cookies);
  445.             if count($this->cookies{
  446.                 $cookie_headers .= 'Cookie: ';
  447.                 foreach $this->cookies as $cookieKey => $cookieVal {
  448.                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  449.                 }
  450.                 $headers .= substr($cookie_headers,0,-2"\r\n";
  451.             
  452.         }
  453.         if(!empty($this->rawheaders))
  454.         {
  455.             if(!is_array($this->rawheaders))
  456.                 $this->rawheaders = (array)$this->rawheaders;
  457.             while(list($headerKey,$headerValeach($this->rawheaders))
  458.                 $headers .= $headerKey.": ".$headerVal."\r\n";
  459.         }
  460.         if(!empty($content_type)) {
  461.             $headers .= "Content-type$content_type";
  462.             if ($content_type == "multipart/form-data")
  463.                 $headers .= "; boundary=".$this->_mime_boundary;
  464.             $headers .= "\r\n";
  465.         }
  466.         if(!empty($body))    
  467.             $headers .= "Content-length: ".strlen($body)."\r\n";
  468.         if(!empty($this->user|| !empty($this->pass))    
  469.             $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
  470.  
  471.         $headers .= "\r\n";
  472.         
  473.         // set the read timeout if needed
  474.         if ($this->read_timeout > 0)
  475.             socket_set_timeout($fp$this->read_timeout);
  476.         $this->timed_out = false;
  477.         
  478.         fwrite($fp,$headers.$body,strlen($headers.$body));
  479.         
  480.         $this->_redirectaddr = false;
  481.         unset($this->headers);
  482.         
  483.         // content was returned gzip encoded?
  484.         $is_gzipped false;
  485.                         
  486.         while($currentHeader fgets($fp,$this->_maxlinelen))
  487.         {
  488.             if ($this->read_timeout > && $this->_check_timeout($fp))
  489.             {
  490.                 $this->status=-100;
  491.                 return false;
  492.             }
  493.                 
  494.         //    if($currentHeader == "\r\n")
  495.             if(preg_match("/^\r?\n$/"$currentHeader) )
  496.                   break;
  497.                         
  498.             // if a header begins with Location: or URI:, set the redirect
  499.             if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  500.             {
  501.                 // get URL portion of the redirect
  502.                 preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
  503.                 // look for :// in the Location header to see if hostname is included
  504.                 if(!preg_match("|\:\/\/|",$matches[2]))
  505.                 {
  506.                     // no host in the path, so prepend
  507.                     $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  508.                     // eliminate double slash
  509.                     if(!preg_match("|^/|",$matches[2]))
  510.                             $this->_redirectaddr .= "/".$matches[2];
  511.                     else
  512.                             $this->_redirectaddr .= $matches[2];
  513.                 }
  514.                 else
  515.                     $this->_redirectaddr = $matches[2];
  516.             }
  517.         
  518.             if(preg_match("|^HTTP/|",$currentHeader))
  519.             {
  520.                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader$status))
  521.                 {
  522.                     $this->status$status[1];
  523.                 }                
  524.                 $this->response_code = $currentHeader;
  525.             }
  526.             
  527.             if (preg_match("/Content-Encoding: gzip/"$currentHeader) ) {
  528.                 $is_gzipped true;
  529.             }
  530.             
  531.             $this->headers[$currentHeader;
  532.         }
  533.  
  534.         # $results = fread($fp, $this->maxlength);
  535.         $results "";
  536.         while $data fread($fp$this->maxlength) ) {
  537.             $results .= $data;
  538.             if (
  539.                 strlen($results$this->maxlength {
  540.                 break;
  541.             }
  542.         }
  543.         
  544.         // gunzip
  545.         if $is_gzipped {
  546.             // per http://www.php.net/manual/en/function.gzencode.php
  547.             $results substr($results10);
  548.             $results gzinflate($results);
  549.         }
  550.         
  551.         if ($this->read_timeout > && $this->_check_timeout($fp))
  552.         {
  553.             $this->status=-100;
  554.             return false;
  555.         }
  556.         
  557.         // check if there is a a redirect meta tag
  558.         
  559.         if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  560.         {
  561.             $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
  562.         }
  563.  
  564.         // have we hit our frame depth and is there frame src to fetch?
  565.         if(($this->_framedepth < $this->maxframes&& preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  566.         {
  567.             $this->results[$results;
  568.             for($x=0$x<count($match[1])$x++)
  569.                 $this->_frameurls[$this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  570.         }
  571.         // have we already fetched framed content?
  572.         elseif(is_array($this->results))
  573.             $this->results[$results;
  574.         // no framed content
  575.         else
  576.             $this->results = $results;
  577.         
  578.         return true;
  579.     }
  580.  
  581. /*======================================================================*\
  582.     Function:    _httpsrequest
  583.     Purpose:    go get the https data from the server using curl
  584.     Input:        $url        the url to fetch
  585.                 $URI        the full URI
  586.                 $body        body contents to send if any (POST)
  587.     Output:        
  588. \*======================================================================*/
  589.     
  590.     function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  591.     {
  592.         if($this->passcookies && $this->_redirectaddr)
  593.             $this->setcookies();
  594.  
  595.         $headers array();        
  596.                     
  597.         $URI_PARTS parse_url($URI);
  598.         if(empty($url))
  599.             $url "/";
  600.         // GET ... header not needed for curl
  601.         //$headers[] = $http_method." ".$url." ".$this->_httpversion;        
  602.         if(!empty($this->agent))
  603.             $headers["User-Agent: ".$this->agent;
  604.         if(!empty($this->host))
  605.             $headers["Host: ".$this->host;
  606.         if(!empty($this->accept))
  607.             $headers["Accept: ".$this->accept;
  608.         if(!empty($this->referer))
  609.             $headers["Referer: ".$this->referer;
  610.         if(!empty($this->cookies))
  611.         {            
  612.             if(!is_array($this->cookies))
  613.                 $this->cookies = (array)$this->cookies;
  614.     
  615.             reset($this->cookies);
  616.             if count($this->cookies{
  617.                 $cookie_str 'Cookie: ';
  618.                 foreach $this->cookies as $cookieKey => $cookieVal {
  619.                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  620.                 }
  621.                 $headers[substr($cookie_str,0,-2);
  622.             }
  623.         }
  624.         if(!empty($this->rawheaders))
  625.         {
  626.             if(!is_array($this->rawheaders))
  627.                 $this->rawheaders = (array)$this->rawheaders;
  628.             while(list($headerKey,$headerValeach($this->rawheaders))
  629.                 $headers[$headerKey.": ".$headerVal;
  630.         }
  631.         if(!empty($content_type)) {
  632.             if ($content_type == "multipart/form-data")
  633.                 $headers["Content-type$content_typeboundary=".$this->_mime_boundary;
  634.             else
  635.                 $headers["Content-type$content_type";
  636.         }
  637.         if(!empty($body))    
  638.             $headers["Content-length: ".strlen($body);
  639.         if(!empty($this->user|| !empty($this->pass))    
  640.             $headers["Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  641.             
  642.         for($curr_header 0$curr_header count($headers)$curr_header++{
  643.             $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
  644.         }
  645.                                            
  646.         if(!empty($body))
  647.             $cmdline_params .= " -d \"$body\"";
  648.         
  649.         if($this->read_timeout > 0)
  650.             $cmdline_params .= " -m ".$this->read_timeout;
  651.         
  652.         $headerfile uniqid(time());
  653.         
  654.         # accept self-signed certs
  655.         $cmdline_params .= " -k"
  656.         exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
  657.         
  658.         if($return)
  659.         {
  660.             $this->error = "ErrorcURL could not retrieve the documenterror $return.";
  661.             return false;
  662.         }
  663.             
  664.             
  665.         $results implode("\r\n",$results);
  666.         
  667.         $result_headers file("/tmp/$headerfile");
  668.                         
  669.         $this->_redirectaddr = false;
  670.         unset($this->headers);
  671.                         
  672.         for($currentHeader 0$currentHeader count($result_headers)$currentHeader++)
  673.         {
  674.             
  675.             // if a header begins with Location: or URI:, set the redirect
  676.             if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  677.             {
  678.                 // get URL portion of the redirect
  679.                 preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
  680.                 // look for :// in the Location header to see if hostname is included
  681.                 if(!preg_match("|\:\/\/|",$matches[2]))
  682.                 {
  683.                     // no host in the path, so prepend
  684.                     $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  685.                     // eliminate double slash
  686.                     if(!preg_match("|^/|",$matches[2]))
  687.                             $this->_redirectaddr .= "/".$matches[2];
  688.                     else
  689.                             $this->_redirectaddr .= $matches[2];
  690.                 }
  691.                 else
  692.                     $this->_redirectaddr = $matches[2];
  693.             }
  694.         
  695.             if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  696.             {
  697.                 $this->response_code = $result_headers[$currentHeader];
  698.                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code$match))
  699.                 {
  700.                 $this->status$match[1];
  701.                         }
  702.             }
  703.             $this->headers[$result_headers[$currentHeader];
  704.         }
  705.  
  706.         // check if there is a a redirect meta tag
  707.         
  708.         if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  709.         {
  710.             $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
  711.         }
  712.  
  713.         // have we hit our frame depth and is there frame src to fetch?
  714.         if(($this->_framedepth < $this->maxframes&& preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  715.         {
  716.             $this->results[$results;
  717.             for($x=0$x<count($match[1])$x++)
  718.                 $this->_frameurls[$this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  719.         }
  720.         // have we already fetched framed content?
  721.         elseif(is_array($this->results))
  722.             $this->results[$results;
  723.         // no framed content
  724.         else
  725.             $this->results = $results;
  726.  
  727.         unlink("/tmp/$headerfile");
  728.         
  729.         return true;
  730.     }
  731.  
  732. /*======================================================================*\
  733.     Function:    setcookies()
  734.     Purpose:    set cookies for a redirection
  735. \*======================================================================*/
  736.     
  737.     function setcookies()
  738.     {
  739.         for($x=0$x<count($this->headers)$x++)
  740.         {
  741.         if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i"$this->headers[$x],$match))
  742.             $this->cookies[$match[1]] $match[2];
  743.         }
  744.     }
  745.  
  746.     
  747. /*======================================================================*\
  748.     Function:    _check_timeout
  749.     Purpose:    checks whether timeout has occurred
  750.     Input:        $fp    file pointer
  751. \*======================================================================*/
  752.  
  753.     function _check_timeout($fp)
  754.     {
  755.         if ($this->read_timeout > 0{
  756.             $fp_status socket_get_status($fp);
  757.             if ($fp_status["timed_out"]{
  758.                 $this->timed_out = true;
  759.                 return true;
  760.             }
  761.         }
  762.         return false;
  763.     }
  764.  
  765. /*======================================================================*\
  766.     Function:    _connect
  767.     Purpose:    make a socket connection
  768.     Input:        $fp    file pointer
  769. \*======================================================================*/
  770.     
  771.     function _connect(&$fp)
  772.     {
  773.         if(!empty($this->proxy_host&& !empty($this->proxy_port))
  774.             {
  775.                 $this->_isproxy = true;
  776.                 $host $this->proxy_host;
  777.                 $port $this->proxy_port;
  778.             }
  779.         else
  780.         {
  781.             $host $this->host;
  782.             $port $this->port;
  783.         }
  784.     
  785.         $this->status = 0;
  786.         
  787.         if($fp fsockopen(
  788.                     $host,
  789.                     $port,
  790.                     $errno,
  791.                     $errstr,
  792.                     $this->_fp_timeout
  793.                     ))
  794.         {
  795.             // socket connection succeeded
  796.  
  797.             return true;
  798.         }
  799.         else
  800.         {
  801.             // socket connection failed
  802.             $this->status = $errno;
  803.             switch($errno)
  804.             {
  805.                 case -3:
  806.                     $this->error="socket creation failed (-3)";
  807.                 case -4:
  808.                     $this->error="dns lookup failure (-4)";
  809.                 case -5:
  810.                     $this->error="connection refused or timed out (-5)";
  811.                 default:
  812.                     $this->error="connection failed (".$errno.")";
  813.             }
  814.             return false;
  815.         }
  816.     }
  817. /*======================================================================*\
  818.     Function:    _disconnect
  819.     Purpose:    disconnect a socket connection
  820.     Input:        $fp    file pointer
  821. \*======================================================================*/
  822.     
  823.     function _disconnect($fp)
  824.     {
  825.         return(fclose($fp));
  826.     }
  827.  
  828.     
  829. /*======================================================================*\
  830.     Function:    _prepare_post_body
  831.     Purpose:    Prepare post body according to encoding type
  832.     Input:        $formvars  - form variables
  833.                 $formfiles - form upload files
  834.     Output:        post body
  835. \*======================================================================*/
  836.     
  837.     function _prepare_post_body($formvars$formfiles)
  838.     {
  839.         settype($formvars"array");
  840.         settype($formfiles"array");
  841.  
  842.         if (count($formvars== && count($formfiles== 0)
  843.             return;
  844.         
  845.         switch ($this->_submit_type{
  846.             case "application/x-www-form-urlencoded":
  847.                 reset($formvars);
  848.                 while(list($key,$valeach($formvars)) {
  849.                     if (is_array($val|| is_object($val)) {
  850.                         while (list($cur_key$cur_valeach($val)) {
  851.                             $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  852.                         }
  853.                     else
  854.                         $postdata .= urlencode($key)."=".urlencode($val)."&";
  855.                 }
  856.                 break;
  857.  
  858.             case "multipart/form-data":
  859.                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  860.                 
  861.                 reset($formvars);
  862.                 while(list($key,$valeach($formvars)) {
  863.                     if (is_array($val|| is_object($val)) {
  864.                         while (list($cur_key$cur_valeach($val)) {
  865.                             $postdata .= "--".$this->_mime_boundary."\r\n";
  866.                             $postdata .= "Content-Dispositionform-dataname=\"$key\[\]\"\r\n\r\n";
  867.                             $postdata .= "$cur_val\r\n";
  868.                         }
  869.                     else {
  870.                         $postdata .= "--".$this->_mime_boundary."\r\n";
  871.                         $postdata .= "Content-Dispositionform-dataname=\"$key\"\r\n\r\n";
  872.                         $postdata .= "$val\r\n";
  873.                     }
  874.                 }
  875.                 
  876.                 reset($formfiles);
  877.                 while (list($field_name$file_nameseach($formfiles)) {
  878.                     settype($file_names"array");
  879.                     while (list($file_nameeach($file_names)) {
  880.                         if (!is_readable($file_name)) continue;
  881.  
  882.                         $fp fopen($file_name"r");
  883.                         $file_content fread($fpfilesize($file_name));
  884.                         fclose($fp);
  885.                         $base_name basename($file_name);
  886.  
  887.                         $postdata .= "--".$this->_mime_boundary."\r\n";
  888.                         $postdata .= "Content-Dispositionform-dataname=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  889.                         $postdata .= "$file_content\r\n";
  890.                     }
  891.                 }
  892.                 $postdata .= "--".$this->_mime_boundary."--\r\n";
  893.                 break;
  894.         }
  895.  
  896.         return $postdata;
  897.     }
  898. }
  899.  
  900. ?>

Documentation generated on Tue, 01 May 2007 16:47:18 +0200 by phpDocumentor 1.3.2