比较两个大文件需要花费四个多小时

I have an online store that has about 15,000 products that get's updated everyday. Currently I upload the new list everyday, but it poses some issues (like downtime being a huge issue) and I wanted to come up with an alternative. I created a script that moves the "yesterdays" products list and downloads today's products list. Then I go line-by-line and compare the two files seeing what needs to be deleted, modified, are created. This will allow me to perform an update with minimal amount of work, no downtime since everything will happen behind the scenes via CRON job, and it's how it should done.

The problem I have is it takes over four hours for the process to happen and I'm not sure if what I'm doing is the most efficient way. My first thought is to write something in C++, but I'm not sure how much faster that would be compared to PHP.

My question(s) is:

• Is this the most efficient way to do this?

• Is PHP the best language to do this?

Here's my script I wrote that handles the download and comparison:

public function __construct($url, $user, $pass)
{
    $this->logger = new KLogger("/opt/lampp/htdocs/lea/logs/master.log" , KLogger::INFO);

    /* increase execution time and server memory limit */
    ini_set('max_execution_time', 14400);
    ini_set('memory_limit', '-1');

    /* set veriables */
    $this->ftp   = ftp_connect($url);
    $this->login = ftp_login($this->ftp, $user, $pass);

    $this->old = file('/opt/lampp/htdocs/lea/products/new/temp/rsr_inventory.txt');
    $this->new = file('/opt/lampp/htdocs/lea/products/new/rsr_inventory.txt');

    $this->list = array();

    $this->start_time = date('Hi');

    $this->counter = 0;
}

public function download($to, $from)
{
    // move current file to new location to get new file ready
    $this->logger->LogInfo('move yesterday\'s products list');
    rename('/opt/lampp/htdocs/lea/products/new/temp/rsr_inventory.txt', '/opt/lampp/htdocs/lea/products/new/rsr_inventory.txt');                

    // get list from rsr    
    $this->logger->LogInfo('get new list from rsr');        
    if(ftp_get($this->ftp, $to, $from, FTP_BINARY))
    {
        return true;
    }
    return false;
}

public function update()
{
    // initialize process
    $this->logger->LogInfo('update process initialized');

    for($i = 0; $i < count($this->new); $i++)
    {
        $new[$i] = explode(';', $this->new[$i]);
        $response = $this->_match($new[$i]);
        if($response[0])
        {
            if(trim($response[2]) != trim($new[$i][5]) || trim($response[3]) != trim($new[$i][8]))
            {
                $this->list[$this->counter][0] = $response[1];
                $this->list[$this->counter][1] = 'update';
                $this->list[$this->counter][2] = trim($response[2]);
                $this->list[$this->counter][3] = trim($response[3]);
                $this->counter++;
            }                               
        }
        else
        {
            $this->list[$this->counter][0] = $response[1];
            $this->list[$this->counter][1] = 'create';
            $this->list[$this->counter][2] = trim($response[2]);
            $this->list[$this->counter][3] = trim($response[3]);
            $this->counter++;
        }           
    }
    if(count($this->list) > 0)
    {
        //csv           
        $this->logger->LogInfo('create update.csv');

        $updates = fopen('/opt/lampp/htdocs/lea/products/new/updates.csv', 'w');
        foreach($this->list as $fields)
        {
            fputcsv($updates, $fields);
        }
        fclose($updates);
    }

    $this->logger->LogInfo('product update process complete');
    $this->__mail();

}

private function _match($item)
{       
    for($j = 0; $j < count($this->old); $j++)
    {
        $old[$j] = explode(';', $this->old[$j]);

        if($item[0] === $old[$j][0])
        {                               
            return array(true, $item[0], $old[$j][5], $old[$j][8]);
        }
    }       
    return array(false, NULL, NULL, NULL);
}

Here is an example of the products.txt file I get everyday (I'm only showing 10 products, but there are roughly 15,000 (there is a lot of things missing; prices, qty, and etc..., but I shortened everything up since it doesn't matter to show those) :

511-10010-019-L-XL;844802282208;5.11 RECON ANKLE SOCK BLK L/XL; 
511-10010-036-L-XL;844802282246;5.11 RECON ANKLE SOCK SHADOW L/XL; 
511-10010-132-LXL;844802334662;5.11 RECON ANKLE SOCK TIMBER L/XL;
511-10010-200-L-XL;844802282222;5.11 RECON ANKLE SOCK FATIGUE L/XL; 
511-10011-019-L-XL;844802276382;5.11 COLD WEATHER OTC SOCK BLK L/XL; 
511-10012-019-L-XL;844802276429;5.11 COLD WEATHER CREW SOCK BLK L/XL; 
511-30012-019-M;844802269650;5.11 WOMENS HOLSTER SHIRT BLK M; 
511-40011-010-L;844802016148;5.11 HOLSTER SHIRT L WHITE; 
511-40011-010-M;844802016131;5.11 HOLSTER SHIRT M WHITE; 
511-40011-010-XL;844802016155;5.11 HOLSTER SHIRT XL WHITE; 
511-40011-010-XXL;844802016162;5.11 HOLSTER SHIRT 2XL WHITE; 
查看全部
dongshai1944
dongshai1944
2015/12/20 20:01
  • server
  • c++
  • php
  • 点赞
  • 收藏
  • 回答
    私信
满意答案
查看全部

2个回复