LDA 中用pyLDAvis可视化出现的ascii' codec can't encode characters in position 18-19: ordinal not in range(128)错误?
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-7-ca6914ad3a98> in <module>
----> 1 visibel(2,20302)
<ipython-input-2-0255fd910b80> in visibel(topic_num, data_num)
86 model_name = './lda_{}_{}.model'.format(topic_num, data_num)
87 lda = models.ldamodel.LdaModel.load(model_name)
---> 88 vis_data = pyLDAvis.gensim.prepare(lda, corpus, dictionary)
89 pyLDAvis.show(vis_data)
90
G:\Python38\lib\site-packages\pyLDAvis\gensim.py in prepare(topic_model, corpus, dictionary, doc_topic_dist, **kwargs)
117 """
118 opts = fp.merge(_extract_data(topic_model, corpus, dictionary, doc_topic_dist), kwargs)
--> 119 return vis_prepare(**opts)
G:\Python38\lib\site-packages\pyLDAvis\_prepare.py in prepare(topic_term_dists, doc_topic_dists, doc_lengths, vocab, term_frequency, R, lambda_step, mds, n_jobs, plot_opts, sort_topics)
396 term_frequency = np.sum(term_topic_freq, axis=0)
397
--> 398 topic_info = _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
399 token_table = _token_table(topic_info, term_topic_freq, vocab, term_frequency)
400 topic_coordinates = _topic_coordinates(mds, topic_term_dists, topic_proportion)
G:\Python38\lib\site-packages\pyLDAvis\_prepare.py in _topic_info(topic_term_dists, topic_proportion, term_frequency, term_topic_freq, vocab, lambda_step, R, n_jobs)
252 'Category': 'Topic%d' % new_topic_id})
253
--> 254 top_terms = pd.concat(Parallel(n_jobs=n_jobs)(delayed(_find_relevance_chunks)(log_ttd, log_lift, R, ls) \
255 for ls in _job_chunks(lambda_seq, n_jobs)))
256 topic_dfs = map(topic_top_term_df, enumerate(top_terms.T.iterrows(), 1))
G:\Python38\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
971
972 if not self._managed_backend:
--> 973 n_jobs = self._initialize_backend()
974 else:
975 n_jobs = self._effective_n_jobs()
G:\Python38\lib\site-packages\joblib\parallel.py in _initialize_backend(self)
738 """Build a process or thread pool and return the number of workers"""
739 try:
--> 740 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
741 **self._backend_args)
742 if self.timeout is not None and not self._backend.supports_timeout:
G:\Python38\lib\site-packages\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, prefer, require, idle_worker_timeout, **memmappingexecutor_args)
492 SequentialBackend(nesting_level=self.nesting_level))
493
--> 494 self._workers = get_memmapping_executor(
495 n_jobs, timeout=idle_worker_timeout,
496 env=self._prepare_worker_env(n_jobs=n_jobs),
G:\Python38\lib\site-packages\joblib\executor.py in get_memmapping_executor(n_jobs, **kwargs)
18
19 def get_memmapping_executor(n_jobs, **kwargs):
---> 20 return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs)
21
22
G:\Python38\lib\site-packages\joblib\executor.py in get_memmapping_executor(cls, n_jobs, timeout, initializer, initargs, env, temp_folder, context_id, **backend_args)
40 _executor_args = executor_args
41
---> 42 manager = TemporaryResourcesManager(temp_folder)
43
44 # reducers access the temporary folder in which to store temporary
G:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in __init__(self, temp_folder_root, context_id)
529 # exposes exposes too many low-level details.
530 context_id = uuid4().hex
--> 531 self.set_current_context(context_id)
532
533 def set_current_context(self, context_id):
G:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in set_current_context(self, context_id)
533 def set_current_context(self, context_id):
534 self._current_context_id = context_id
--> 535 self.register_new_context(context_id)
536
537 def register_new_context(self, context_id):
G:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in register_new_context(self, context_id)
558 new_folder_name, self._temp_folder_root
559 )
--> 560 self.register_folder_finalizer(new_folder_path, context_id)
561 self._cached_temp_folders[context_id] = new_folder_path
562
G:\Python38\lib\site-packages\joblib\_memmapping_reducer.py in register_folder_finalizer(self, pool_subfolder, context_id)
588 # semaphores and pipes
589 pool_module_name = whichmodule(delete_folder, 'delete_folder')
--> 590 resource_tracker.register(pool_subfolder, "folder")
591
592 def _cleanup():
G:\Python38\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in register(self, name, rtype)
189 '''Register a named resource, and increment its refcount.'''
190 self.ensure_running()
--> 191 self._send('REGISTER', name, rtype)
192
193 def unregister(self, name, rtype):
G:\Python38\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py in _send(self, cmd, name, rtype)
202
203 def _send(self, cmd, name, rtype):
--> 204 msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii')
205 if len(name) > 512:
206 # posix guarantees that writes to a pipe of less than PIPE_BUF
UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-19: ordinal not in range(128)