dongyi1939 2015-10-27 19:49
浏览 67
已采纳

PHP Pthread似乎有随机内存管理和内存泄漏[关闭]

So, I have hit a dead-end here. I have tried everything I know to isolate a memory leak and from what I've gathered, it seems to be related to the use of pthread for multithreading this script.

I am in the process of writing a bot for Wikipedia, and I am nearing completion. Functionally, the program is sound, and works as expected in both multithreading and single-threading. The memory leak only occurs when multi-threading is switched on.

Both versions use exactly the same functions on the same script/file, to allow for easy/consistent debugging.

The engine that threads is below.

//Multithread engine

//This thread class allows for asyncronous function calls.  This is useful for the functions that consume time and can run in the background.
//Caution must be excercised to ensure that the functions are thread safe.
class AsyncFunctionCall extends Thread {

    protected $method;
    protected $params;
    public $result;

    public function __construct( $method, $params ) {
        $this->method = $method;
        $this->params = $params;
        $this->result = null; 
    }

    public function run() {
        if (($this->result=call_user_func_array($this->method, $this->params))) {
            return true;
        } else return false;
    }

    public static function call($method, $params){
        $thread = new AsyncFunctionCall($method, $params);
        if($thread->start()){
            return $thread;
        } else {
            echo "Unable to initiate background function $method!
";
            return false;
        }
    }
}

// Analyze multiple pages simultaneously and edit them.
class ThreadedBot extends Collectable {

    protected $page, $pageid, $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN;

    public $result;

    public function __construct($page, $pageid, $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN) {
        $this->page = $page;
        $this->pageid = $pageid;
        $this->alreadyArchived = $alreadyArchived;
        $this->ARCHIVE_ALIVE = $ARCHIVE_ALIVE;
        $this->TAG_OVERRIDE = $TAG_OVERRIDE;
        $this->ARCHIVE_BY_ACCESSDATE = $ARCHIVE_BY_ACCESSDATE;
        $this->TOUCH_ARCHIVE = $TOUCH_ARCHIVE;
        $this->DEAD_ONLY = $DEAD_ONLY;
        $this->NOTIFY_ERROR_ON_TALK = $NOTIFY_ERROR_ON_TALK;
        $this->NOTIFY_ON_TALK = $NOTIFY_ON_TALK;
        $this->TALK_MESSAGE_HEADER = $TALK_MESSAGE_HEADER;
        $this->TALK_MESSAGE = $TALK_MESSAGE;
        $this->TALK_ERROR_MESSAGE_HEADER = $TALK_ERROR_MESSAGE_HEADER;
        $this->TALK_ERROR_MESSAGE = $TALK_ERROR_MESSAGE;
        $this->DEADLINK_TAGS = $DEADLINK_TAGS;
        $this->CITATION_TAGS = $CITATION_TAGS;
        $this->IGNORE_TAGS = $IGNORE_TAGS;
        $this->ARCHIVE_TAGS = $ARCHIVE_TAGS;
        $this->VERIFY_DEAD = $VERIFY_DEAD;
        $this->LINK_SCAN = $LINK_SCAN;    
    }

    public function run() {
        ini_set( 'memory_limit', '1G' );
        echo ini_get( 'memory_limit' )."; ".(memory_get_usage( true )/1024/1024)." MB
";
        $this->result = analyzePage( $this->page, $this->pageid, $this->alreadyArchived, $this->ARCHIVE_ALIVE, $this->TAG_OVERRIDE, $this->ARCHIVE_BY_ACCESSDATE, $this->TOUCH_ARCHIVE, $this->DEAD_ONLY, $this->NOTIFY_ERROR_ON_TALK, $this->NOTIFY_ON_TALK, $this->TALK_MESSAGE_HEADER, $this->TALK_MESSAGE, $this->TALK_ERROR_MESSAGE_HEADER, $this->TALK_ERROR_MESSAGE, $this->DEADLINK_TAGS, $this->CITATION_TAGS, $this->IGNORE_TAGS, $this->ARCHIVE_TAGS, $this->VERIFY_DEAD, $this->LINK_SCAN);
        $this->setGarbage();
        $this->page = null;
        $this->pageid = null;
        $this->alreadyArchived = null;
        $this->ARCHIVE_ALIVE = null;
        $this->TAG_OVERRIDE = null;
        $this->ARCHIVE_BY_ACCESSDATE = null;
        $this->TOUCH_ARCHIVE = null;
        $this->DEAD_ONLY = null;
        $this->NOTIFY_ERROR_ON_TALK = null;
        $this->NOTIFY_ON_TALK = null;
        $this->TALK_MESSAGE_HEADER = null;
        $this->TALK_MESSAGE = null;
        $this->TALK_ERROR_MESSAGE_HEADER = null;
        $this->TALK_ERROR_MESSAGE = null;
        $this->DEADLINK_TAGS = null;
        $this->CITATION_TAGS = null;
        $this->IGNORE_TAGS = null;
        $this->ARCHIVE_TAGS = null;
        $this->VERIFY_DEAD = null;
        $this->LINK_SCAN = null;
        unset( $this->page, $this->pageid, $this->alreadyArchived, $this->ARCHIVE_ALIVE, $this->TAG_OVERRIDE, $this->ARCHIVE_BY_ACCESSDATE, $this->TOUCH_ARCHIVE, $this->DEAD_ONLY, $this->NOTIFY_ERROR_ON_TALK, $this->NOTIFY_ON_TALK, $this->TALK_MESSAGE_HEADER, $this->TALK_MESSAGE, $this->TALK_ERROR_MESSAGE_HEADER, $this->TALK_ERROR_MESSAGE, $this->DEADLINK_TAGS, $this->CITATION_TAGS, $this->IGNORE_TAGS, $this->ARCHIVE_TAGS, $this->VERIFY_DEAD, $this->LINK_SCAN );
    }
}

This block here in the body of the program calls the threading engine.

if( WORKERS === false ) {
    foreach( $pages as $tid => $tpage ) {
        $pagesAnalyzed++;
        $stats = analyzePage( $tpage['title'], $tpage['pageid'], $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN );
        if( $stats['pagemodified'] === true ) $pagesModified++;
        $linksAnalyzed += $stats['linksanalyzed'];
        $linksArchived += $stats['linksarchived'];
        $linksFixed += $stats['linksrescued'];
        $linksTagged += $stats['linkstagged'];
        $alreadyArchived = array_merge( $stats['newlyArchived'], $alreadyArchived );
        $failedToArchive = array_merge( $failedToArchive, $stats['archiveProblems'] );
        $allerrors = array_merge( $allerrors, $stats['errors'] );
        file_put_contents( $dlaaLocation, serialize( $alreadyArchived ) );
    }
} else {
    //for( $i = 0; $i < count( $pages ); $i += $workerLimit ) {
        $workerQueue = new Pool( $workerLimit );
        //$tpages = array_slice( $pages, $i, $workerLimit );
        foreach( $pages as $tid => $tpage ) {
            $pagesAnalyzed++;
            echo "Submitted {$tpage['title']}, job ".($tid+1)." for analyzing...
";
            $workerQueue->submit( new ThreadedBot( $tpage['title'], $tpage['pageid'], $alreadyArchived, $ARCHIVE_ALIVE, $TAG_OVERRIDE, $ARCHIVE_BY_ACCESSDATE, $TOUCH_ARCHIVE, $DEAD_ONLY, $NOTIFY_ERROR_ON_TALK, $NOTIFY_ON_TALK, $TALK_MESSAGE_HEADER, $TALK_MESSAGE, $TALK_ERROR_MESSAGE_HEADER, $TALK_ERROR_MESSAGE, $DEADLINK_TAGS, $CITATION_TAGS, $IGNORE_TAGS, $ARCHIVE_TAGS, $VERIFY_DEAD, $LINK_SCAN ) );

        }
        $workerQueue->shutdown();
        $workerQueue->collect(
        function( $thread ) {
            global $pagesModified, $linksAnalyzed, $linksArchived, $linksFixed, $linksTagged, $alreadyArchived, $failedToArchive, $allerrors;
            $stats = $thread->result;
            if( $stats['pagemodified'] === true ) $pagesModified++;
            $linksAnalyzed += $stats['linksanalyzed'];
            $linksArchived += $stats['linksarchived'];
            $linksFixed += $stats['linksrescued'];
            $linksTagged += $stats['linkstagged'];
            $alreadyArchived = array_merge( $stats['newlyArchived'], $alreadyArchived );
            $failedToArchive = array_merge( $failedToArchive, $stats['archiveProblems'] );
            $allerrors = array_merge( $allerrors, $stats['errors'] );
            return $thread->isGarbage();
        });
        echo "!!!!!!!!!!!!!!Links analyzed so far: $linksAnalyzed

";
        file_put_contents( $dlaaLocation, serialize( $alreadyArchived ) );
        //$workerQueue = null;
        //unset( $workerQueue );
    //}
}

As you can see above, the if statement decides whether to multithread or single-thread. Some notes, $workerLimit = 20, all resources initialized in functions are closed, nullified, and unset, there is no memory leak as a result of function calls, memory_limit has been confirmed to be at 1G, workers will eventually crash with an OOM Fatal error, the memory allocation seems to be randomly assigned among worker, each worker gradually uses more and more memory, the script itself goes to 700 MB according to task manager before crashing, and finally the more workers I add, the faster the crash in each worker, and 100 workers create an immediate crash.

Here's a segment of the output.

Analyzed Stanley Hartt (8742961)
Rescued: 0; Tagged dead: 0; Archived: 0; Max System Memory Used: 1.25 MB

PHP Fatal error:  Out of memory (allocated 46661632) (tried to allocate 6557907 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259

Fatal error: Out of memory (allocated 46661632) (tried to allocate 6557907 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259
Analyzed High-explosive anti-tank warhead (255968)
Rescued: 0; Tagged dead: 0; Archived: 5; Max System Memory Used: 22.75 MB

PHP Fatal error:  Out of memory (allocated 14680064) (tried to allocate 6341940 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1261

Fatal error: Out of memory (allocated 14680064) (tried to allocate 6341940 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1261
PHP Fatal error:  Out of memory (allocated 6291456) (tried to allocate 5243257 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259

Fatal error: Out of memory (allocated 6291456) (tried to allocate 5243257 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259
PHP Fatal error:  Out of memory (allocated 7864320) (tried to allocate 5245685 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259

Fatal error: Out of memory (allocated 7864320) (tried to allocate 5245685 bytes) in C:\Users\Maximilian Doerr\Documents\GitHub\Cyberbot_II\deadlink.php on line 1259
Analyzed Nadezhda Tylik (2896780)
Rescued: 0; Tagged dead: 0; Archived: 5; Max System Memory Used: 2.75 MB

This is my first time multithreading so I'm new to this, so I would appreciate any help and suggestions, and if you have more questions, just ask. :-)

  • 写回答

1条回答 默认 最新

  • dsms21398 2017-06-17 18:39
    关注

    So it turns out this wasn't coming from pthreads. Instead the multithread just made the issue more visible. It turns out I was using multicurl and as a result of using the wrong function to close the handles, the memory wasn't being freed despite the handles being closed.

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

悬赏问题

  • ¥15 飞机曲面部件如机翼,壁板等具体的孔位模型
  • ¥15 vs2019中数据导出问题
  • ¥20 云服务Linux系统TCP-MSS值修改?
  • ¥20 关于#单片机#的问题:项目:使用模拟iic与ov2640通讯环境:F407问题:读取的ID号总是0xff,自己调了调发现在读从机数据时,SDA线上并未有信号变化(语言-c语言)
  • ¥20 怎么在stm32门禁成品上增加查询记录功能
  • ¥15 Source insight编写代码后使用CCS5.2版本import之后,代码跳到注释行里面
  • ¥50 NT4.0系统 STOP:0X0000007B
  • ¥15 想问一下stata17中这段代码哪里有问题呀
  • ¥15 flink cdc无法实时同步mysql数据
  • ¥100 有人会搭建GPT-J-6B框架吗?有偿