[docs]@overridedefapply_to_document(self,document:Union[Document,str],timeout:int=10)->Document:"""文書に Jumanpp を適用する. Args: document: 文書. timeout: 最大処理時間. .. note:: 文分割がまだなら,先に初期化時に設定した senter で文分割する. 未設定なら RegexSenter で文分割する. """ifnotself.is_available():raiseRuntimeError("Juman++ is not available.")start=time.time()ifisinstance(document,str):document=Document(document)doc_id=document.doc_idifdocument.is_senter_required():ifself.senterisNone:logger.debug("senter is not specified; use RegexSenter")self.senter=RegexSenter()document=self.senter.apply_to_document(document,timeout=timeout-int(time.time()-start))sentences:list[Sentence]=[]forsentenceindocument.sentences:sentences.append(self.apply_to_sentence(sentence,timeout=timeout-int(time.time()-start)))ret=Document.from_sentences(sentences)ifdoc_id!="":ret.doc_id=doc_idforsentenceinret.sentences:sentence.doc_id=doc_idreturnret
[docs]@overridedefapply_to_sentence(self,sentence:Union[Sentence,str],timeout:int=10)->Sentence:"""文に Jumanpp を適用する. Args: sentence: 文. timeout: 最大処理時間. """ifnotself.is_available():raiseRuntimeError("Juman++ is not available.")ifisinstance(sentence,str):sentence=Sentence(sentence)stdout_text:str=""defworker()->None:nonlocalstdout_textassertself._procisnotNoneassertself._proc.stdinisnotNoneassertself._proc.stdoutisnotNoneassertself._proc.stderrisnotNoneself._proc.stdin.write(sentence.to_raw_text())self._proc.stdin.flush()stdout_text=""whileself.is_available():line=self._proc.stdout.readline()stdout_text+=lineifline.strip()==Sentence.EOS:break# Non-blocking read from stderrstderr_text:str=""whileself._proc.stderrinselect.select([self._proc.stderr],[],[],0)[0]:line=self._proc.stderr.readline()ifline.strip()=="":breakstderr_text+=lineifstderr_text.strip()!="":logger.debug(stderr_text.strip())withself._lock:thread=threading.Thread(target=worker,daemon=True)thread.start()thread.join(timeout)ifthread.is_alive():self.start_process(skip_sanity_check=True)raiseTimeoutError(f"Operation timed out after {timeout} seconds.")ifnotself.is_available():self.start_process(skip_sanity_check=True)raiseRuntimeError("Juman++ exited unexpectedly.")ret=Sentence.from_jumanpp(stdout_text)ifsentence.textandnotret.text:raiseRuntimeError(f"Juman++ returned empty result for input: '{sentence.text}'")returnret
[docs]defget_version(self)->str:"""Juman++ のバージョンを返す."""ifnotself.is_available():raiseRuntimeError("Juman++ is not available.")p=subprocess.run(self.version_command,capture_output=True,encoding="utf-8",check=True)returnp.stdout.strip()