dongyi1939 2015-10-27 19:49
浏览 67
已采纳

PHP Pthread似乎有随机内存管理和内存泄漏[关闭]

So, I have hit a dead-end here. I have tried everything I know to isolate a memory leak and from what I've gathered, it seems to be related to the use of pthread for multithreading this script.

I am in the process of writing a bot for Wikipedia, and I am nearing completion. Functionally, the program is sound, and works as expected in both multithreading and single-threading. The memory leak only occurs when multi-threading is switched on.

Both versions use exactly the same functions on the same script/file, to allow for easy/consistent debugging.

The engine that threads is below.

//Multithread engine

//This thread class allows for asyncronous function calls.  This is useful for the functions that consume time and can run in the background.
//Caution must be excercised to ensure that the functions are thread safe.
class AsyncFunctionCall extends Thread {

    protected $method;
    protected $params;
    public $result;

    public function __construct( $method, $params ) {
        $this->method = $method;
        $this->params = $params;
        $this->result = null; 
    }

    public function run() {
        if (($this->result=call_user_func_array($this->method, $this->params))) {
            return true;
        } else return false;
    }

    public static function call($method, $params){
        $thread = new AsyncFunctionCall($method, $params);
        if($thread->start()){
            return $thread;
        } else {
            echo "Unable to initiate background function $method!
";
            return false;
        }
    }
}

// Analyze multiple pages simultaneously and edit them.
class ThreadedBot extends Collectable {

    protected $page, $pageid, $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN;

    public $result;

    public function __construct($page, $pageid, $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN) {
        $this->page = $page;
        $this->pageid = $pageid;
        $this->alreadyArchived = $alreadyArchived;
        $this->ARCHIVE_ALIVE = $ARCHIVE_ALIVE;
        $this->TAG_OVERRIDE = $TAG_OVERRIDE;
        $this->ARCHIVE_BY_ACCESSDATE = $ARCHIVE_BY_ACCESSDATE;
        $this->TOUCH_ARCHIVE = $TOUCH_ARCHIVE;
        $this->DEAD_ONLY = $DEAD_ONLY;
        $this->NOTIFY_ERROR_ON_TALK = $NOTIFY_ERROR_ON_TALK;
        $this->NOTIFY_ON_TALK = $NOTIFY_ON_TALK;
        $this->TALK_MESSAGE_HEADER = $TALK_MESSAGE_HEADER;
        $this->TALK_MESSAGE = $TALK_MESSAGE;
        $this->TALK_ERROR_MESSAGE_HEADER = $TALK_ERROR_MESSAGE_HEADER;
        $this->TALK_ERROR_MESSAGE = $TALK_ERROR_MESSAGE;
        $this->DEADLINK_TAGS = $DEADLINK_TAGS;
        $this->CITATION_TAGS = $CITATION_TAGS;
        $this->IGNORE_TAGS = $IGNORE_TAGS;
        $this->ARCHIVE_TAGS = $ARCHIVE_TAGS;
        $this->VERIFY_DEAD = $VERIFY_DEAD;
        $this->LINK_SCAN = $LINK_SCAN;    
    }

    public function run() {
        ini_set( 'memory_limit', '1G' );
        echo ini_get( 'memory_limit' )."; ".(memory_get_usage( true )/1024/1024)." MB
";
        $this->result = analyzePage( $this->page, $this->pageid, $this->alreadyArchived, $this->ARCHIVE_ALIVE, $this->TAG_OVERRIDE, $this->ARCHIVE_BY_ACCESSDATE, $this->TOUCH_ARCHIVE, $this->DEAD_ONLY, $this->NOTIFY_ERROR_ON_TALK, $this->NOTIFY_ON_TALK, $this->TALK_MESSAGE_HEADER, $this->TALK_MESSAGE, $this->TALK_ERROR_MESSAGE_HEADER, $this->TALK_ERROR_MESSAGE, $this->DEADLINK_TAGS, $this->CITATION_TAGS, $this->IGNORE_TAGS, $this->ARCHIVE_TAGS, $this->VERIFY_DEAD, $this->LINK_SCAN);
        $this->setGarbage();
        $this->page = null;
        $this->pageid = null;
        $this->alreadyArchived = null;
        $this->ARCHIVE_ALIVE = null;
        $this->TAG_OVERRIDE = null;
        $this->ARCHIVE_BY_ACCESSDATE = null;
        $this->TOUCH_ARCHIVE = null;
        $this->DEAD_ONLY = null;
        $this->NOTIFY_ERROR_ON_TALK = null;
        $this->NOTIFY_ON_TALK = null;
        $this->TALK_MESSAGE_HEADER = null;
        $this->TALK_MESSAGE = null;
        $this->TALK_ERROR_MESSAGE_HEADER = null;
        $this->TALK_ERROR_MESSAGE = null;
        $this->DEADLINK_TAGS = null;
        $this->CITATION_TAGS = null;
        $this->IGNORE_TAGS = null;
        $this->ARCHIVE_TAGS = null;
        $this->VERIFY_DEAD = null;
        $this->LINK_SCAN = null;
        unset( $this->page, $this->pageid, $this->alreadyArchived, $this->ARCHIVE_ALIVE, $this->TAG_OVERRIDE, $this->ARCHIVE_BY_ACCESSDATE, $this->TOUCH_ARCHIVE, $this->DEAD_ONLY, $this->NOTIFY_ERROR_ON_TALK, $this->NOTIFY_ON_TALK, $this->TALK_MESSAGE_HEADER, $this->TALK_MESSAGE, $this->TALK_ERROR_MESSAGE_HEADER, $this->TALK_ERROR_MESSAGE, $this->DEADLINK_TAGS, $this->CITATION_TAGS, $this->IGNORE_TAGS, $this->ARCHIVE_TAGS, $this->VERIFY_DEAD, $this->LINK_SCAN );
    }
}

This block here in the body of the program calls the threading engine.

if( WORKERS === false ) {
    foreach( $pages as $tid => $tpage ) {
        $pagesAnalyzed++;
        $stats = analyzePage( $tpage['title'], $tpage['pageid'], $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN );
        if( $stats['pagemodified'] === true ) $pagesModified++;
        $linksAnalyzed += $stats['linksanalyzed'];
        $linksArchived += $stats['linksarchived'];
        $linksFixed += $stats['linksrescued'];
        $linksTagged += $stats['linkstagged'];
        $alreadyArchived = array_merge( $stats['newlyArchived'], $alreadyArchived );
        $failedToArchive = array_merge( $failedToArchive, $stats['archiveProblems'] );
        $allerrors = array_merge( $allerrors, $stats['errors'] );
        file_put_contents( $dlaaLocation, serialize( $alreadyArchived ) );
    }
} else {
    //for( $i = 0; $i < count( $pages ); $i += $workerLimit ) {
        $workerQueue = new Pool( $workerLimit );
        //$tpages = array_slice( $pages, $i, $workerLimit );
        foreach( $pages as $tid => $tpage ) {
            $pagesAnalyzed++;
            echo "Submitted {$tpage['title']}, job ".($tid+1)." for analyzing...
";
            $workerQueue->submit( new ThreadedBot( $tpage['title'], $tpage['pageid'], $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN ) );

        }
        $workerQueue->shutdown();
        $workerQueue->collect(
        function( $thread ) {
            global $pagesModified, $linksAnalyzed, $linksArchived, $linksFixed, $linksTagged, $alreadyArchived, $failedToArchive, $allerrors;
            $stats = $thread->result;
            if( $stats['pagemodified'] === true ) $pagesModified++;
            $linksAnalyzed += $stats['linksanalyzed'];
            $linksArchived += $stats['linksarchived'];
            $linksFixed += $stats['linksrescued'];
            $linksTagged += $stats['linkstagged'];
            $alreadyArchived = array_merge( $stats['newlyArchived'], $alreadyArchived );
            $failedToArchive = array_merge( $failedToArchive, $stats['archiveProblems'] );
            $allerrors = array_merge( $allerrors, $stats['errors'] );
            return $thread->isGarbage();
        });
        echo "!!!!!!!!!!!!!!Links analyzed so far: $linksAnalyzed

";
        file_put_contents( $dlaaLocation, serialize( $alreadyArchived ) );
        //$workerQueue = null;
        //unset( $workerQueue );
    //}
}

As you can see above, the if statement decides whether to multithread or single-thread. Some notes, $workerLimit = 20, all resources initialized in functions are closed, nullified, and unset, there is no memory leak as a result of function calls, memory_limit has been confirmed to be at 1G, workers will eventually crash with an OOM Fatal error, the memory allocation seems to be randomly assigned among worker, each worker gradually uses more and more memory, the script itself goes to 700 MB according to task manager before crashing, and finally the more workers I add, the faster the crash in each worker, and 100 workers create an immediate crash.

Here's a segment of the output.

Analyzed Stanley Hartt (8742961)
Rescued: 0; Tagged dead: 0; Archived: 0; Max System Memory Used: 1.25 MB

PHP Fatal error:  Out of memory (allocated 46661632) (tried to allocate 6557907 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259

Fatal error: Out of memory (allocated 46661632) (tried to allocate 6557907 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259
Analyzed High-explosive anti-tank warhead (255968)
Rescued: 0; Tagged dead: 0; Archived: 5; Max System Memory Used: 22.75 MB

PHP Fatal error:  Out of memory (allocated 14680064) (tried to allocate 6341940 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1261

Fatal error: Out of memory (allocated 14680064) (tried to allocate 6341940 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1261
PHP Fatal error:  Out of memory (allocated 6291456) (tried to allocate 5243257 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259

Fatal error: Out of memory (allocated 6291456) (tried to allocate 5243257 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259
PHP Fatal error:  Out of memory (allocated 7864320) (tried to allocate 5245685 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259

Fatal error: Out of memory (allocated 7864320) (tried to allocate 5245685 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259
Analyzed Nadezhda Tylik (2896780)
Rescued: 0; Tagged dead: 0; Archived: 5; Max System Memory Used: 2.75 MB

This is my first time multithreading so I'm new to this, so I would appreciate any help and suggestions, and if you have more questions, just ask. :-)

  • 写回答

1条回答 默认 最新

  • dsms21398 2017-06-17 18:39
    关注

    So it turns out this wasn't coming from pthreads. Instead the multithread just made the issue more visible. It turns out I was using multicurl and as a result of using the wrong function to close the handles, the memory wasn't being freed despite the handles being closed.

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 ads仿真结果在圆图上是怎么读数的
  • ¥20 Cotex M3的调试和程序执行方式是什么样的?
  • ¥20 java项目连接sqlserver时报ssl相关错误
  • ¥15 一道python难题3
  • ¥15 用matlab 设计一个不动点迭代法求解非线性方程组的代码
  • ¥15 牛顿斯科特系数表表示
  • ¥15 arduino 步进电机
  • ¥20 程序进入HardFault_Handler
  • ¥15 oracle集群安装出bug
  • ¥15 关于#python#的问题:自动化测试