[docs]@overridedefapply_to_document(self,document:Union[Document,str],timeout:int=10)->Document:"""文書に KNP を適用する. Args: document: 文書. timeout: 最大処理時間. .. note:: 文分割がまだなら,先に初期化時に設定した senter で文分割する. 未設定なら RegexSenter で文分割する. 形態素解析がまだなら,先に初期化時に設定した jumanpp で形態素解析する. 未設定なら Jumanpp (オプションなし)で形態素解析する. """ifnotself.is_available():raiseRuntimeError("KNP is not available.")start:float=time.time()ifisinstance(document,str):document=Document(document)doc_id=document.doc_idifdocument.is_senter_required():ifself.senterisNone:logger.debug("senter is not specified; use RegexSenter")self.senter=RegexSenter()document=self.senter.apply_to_document(document,timeout=timeout-int(time.time()-start))sentences:list[Sentence]=[]forsentenceindocument.sentences:sentences.append(self.apply_to_sentence(sentence,timeout=timeout-int(time.time()-start)))ret=Document.from_sentences(sentences)ifdoc_id!="":ret.doc_id=doc_idforsentenceinret.sentences:sentence.doc_id=doc_idreturnret
[docs]@overridedefapply_to_sentence(self,sentence:Union[Sentence,str],timeout:int=10)->Sentence:"""文に KNP を適用する. Args: sentence: 文. timeout: 最大処理時間. .. note:: 形態素解析がまだなら,先に初期化時に設定した jumanpp で形態素解析する. 未設定なら Jumanpp (オプションなし)で形態素解析する. """ifself.is_available()isFalse:raiseRuntimeError("KNP is not available.")start:float=time.time()ifisinstance(sentence,str):sentence=Sentence(sentence)ifsentence.is_jumanpp_required():withself._lock:ifself.jumanppisNone:logger.debug("jumanpp is not specified when initializing KNP: use Jumanpp with no option")self.jumanpp=Jumanpp()sentence=self.jumanpp.apply_to_sentence(sentence,timeout=timeout-int(time.time()-start))stdout_text:str=""defworker()->None:nonlocalstdout_textassertself._procisnotNoneassertself._proc.stdinisnotNoneassertself._proc.stdoutisnotNoneassertself._proc.stderrisnotNoneifsentence.is_knp_required():self._proc.stdin.write(sentence.to_jumanpp())else:self._proc.stdin.write(sentence.to_knp())self._proc.stdin.flush()stdout_text=""whileself.is_available():line=self._proc.stdout.readline()stdout_text+=lineifline.strip()==Sentence.EOS:break# Non-blocking read from stderrstderr_text=""whileself._proc.stderrinselect.select([self._proc.stderr],[],[],0)[0]:line=self._proc.stderr.readline()ifline.strip()=="":breakstderr_text+=lineifstderr_text.strip()!="":logger.debug(stderr_text.strip())withself._lock:thread=threading.Thread(target=worker,daemon=True)thread.start()thread.join(timeout)ifthread.is_alive():self.start_process(skip_sanity_check=True)raiseTimeoutError(f"Operation timed out after {timeout} seconds.")ifnotself.is_available():self.start_process(skip_sanity_check=True)raiseRuntimeError("KNP exited unexpectedly.")ret=Sentence.from_knp(stdout_text)ifsentence.textandnotret.text:raiseRuntimeError(f"KNP returned empty result for input: '{sentence.text}'")returnret
[docs]defget_version(self)->str:"""Juman++ のバージョンを返す."""ifnotself.is_available():raiseRuntimeError("KNP is not available.")p=subprocess.run(self.version_command,capture_output=True,encoding="utf-8",check=True)returnp.stderr.strip()