Source code for rhoknp.units.clause
import logging
from functools import cached_property
from typing import TYPE_CHECKING, Optional
try:
from typing import override # type: ignore[attr-defined]
except ImportError:
from typing_extensions import override
from rhoknp.cohesion.discourse import DiscourseRelation
from rhoknp.units.base_phrase import BasePhrase
from rhoknp.units.morpheme import Morpheme
from rhoknp.units.phrase import Phrase
from rhoknp.units.unit import Unit
if TYPE_CHECKING:
from rhoknp.units.document import Document
from rhoknp.units.sentence import Sentence
logger = logging.getLogger(__name__)
[docs]
class Clause(Unit):
"""節クラス."""
count = 0
def __init__(self) -> None:
super().__init__()
# parent unit
self._sentence: "Sentence" | None = None
# child units
self._phrases: list[Phrase] | None = None
self.discourse_relations: list[DiscourseRelation] = [] #: 談話関係のリスト.
self.index = self.count #: 文内におけるインデックス.
Clause.count += 1
@override
def __post_init__(self) -> None:
super().__post_init__()
# Find discourse relations.
for key in self.end.features:
if key.startswith("節-機能"):
relation = DiscourseRelation.from_clause_function_fstring(key, modifier=self)
if relation is not None:
if relation not in relation.modifier.discourse_relations:
relation.modifier.discourse_relations.append(relation)
for base_phrase in self.base_phrases:
for key in base_phrase.features:
if key.startswith("節-前向き機能"):
if base_phrase.parent is None or base_phrase.parent in self.base_phrases:
head = self
else:
head = base_phrase.parent.clause
relation = DiscourseRelation.from_backward_clause_function_fstring(key, head=head)
if relation is not None:
if relation not in relation.modifier.discourse_relations:
relation.modifier.discourse_relations.append(relation)
values = self.end.features.get("談話関係")
if values:
assert isinstance(values, str)
for value in values.split(";"):
relation = DiscourseRelation.from_discourse_relation_fstring(value, modifier=self)
if relation is not None:
if relation not in relation.modifier.discourse_relations:
relation.modifier.discourse_relations.append(relation)
@override
def __hash__(self) -> int:
return hash((self.parent_unit, self.index))
@override
def __eq__(self, other: object) -> bool:
if not isinstance(other, type(self)):
return False
if self.parent_unit != other.parent_unit:
return False
return self.index == other.index
@cached_property
def global_index(self) -> int:
"""文書全体におけるインデックス."""
if not self.sentence.has_document():
return self.index
if self.sentence.index == 0:
return self.index
if self.index > 0:
return self.sentence.clauses[0].global_index + self.index
prev_sentence = self.document.sentences[self.sentence.index - 1]
return prev_sentence.clauses[0].global_index + len(prev_sentence.clauses)
@property
def parent_unit(self) -> Optional["Sentence"]:
"""上位の言語単位(文).未登録なら None."""
return self._sentence
@property
def child_units(self) -> list[Phrase] | None:
"""下位の言語単位(文節).解析結果にアクセスできないなら None."""
return self._phrases
@property
def document(self) -> "Document":
"""文書.
Raises:
AttributeError: 解析結果にアクセスできない場合.
"""
return self.sentence.document
@property
def sentence(self) -> "Sentence":
"""文."""
assert self._sentence is not None
return self._sentence
@sentence.setter
def sentence(self, sentence: "Sentence") -> None:
"""文.
Args:
sentence: 文.
"""
self._sentence = sentence
@property
def phrases(self) -> list[Phrase]:
"""文節のリスト."""
assert self._phrases is not None
return self._phrases
@phrases.setter
def phrases(self, phrases: list[Phrase]) -> None:
"""文節のリスト.
Args:
phrases: 文節のリスト.
"""
for phrase in phrases:
phrase.clause = self
self._phrases = phrases
@property
def base_phrases(self) -> list[BasePhrase]:
"""基本句のリスト."""
return [base_phrase for phrase in self.phrases for base_phrase in phrase.base_phrases]
@property
def morphemes(self) -> list[Morpheme]:
"""形態素のリスト."""
return [morpheme for base_phrase in self.base_phrases for morpheme in base_phrase.morphemes]
@cached_property
def head(self) -> BasePhrase:
"""節主辞の基本句."""
heads: list[BasePhrase] = []
for base_phrase in self.base_phrases:
if "節-主辞" in base_phrase.features:
heads.append(base_phrase)
if len(heads) == 1:
return heads[0]
elif len(heads) > 1:
logger.warning("found multiple heads in a clause; use the last base phrase as the head")
return heads[-1]
else:
logger.warning("found no head in a clause; use the last base phrase as the head")
return self.base_phrases[-1]
@property
def end(self) -> BasePhrase:
"""節区切の基本句."""
return self.base_phrases[-1]
@cached_property
def parent(self) -> Optional["Clause"]:
"""係り先の節.ないなら None."""
head_parent = self.head.parent
while head_parent in self.base_phrases:
head_parent = head_parent.parent
for clause in self.sentence.clauses:
if head_parent in clause.base_phrases:
return clause
return None
@cached_property
def children(self) -> list["Clause"]:
"""この節に係っている節のリスト."""
return [clause for clause in self.sentence.clauses if clause.parent == self]
[docs]
def is_adnominal(self) -> bool:
"""連体修飾節なら True."""
return self.end.features.get("節-区切", "") == "連体修飾"
[docs]
def is_sentential_complement(self) -> bool:
"""補文節なら True."""
return self.end.features.get("節-区切", "") == "補文"
[docs]
@classmethod
def from_knp(cls, knp_text: str) -> "Clause":
"""節クラスのインスタンスを KNP の解析結果から初期化.
Args:
knp_text: KNP の解析結果.
"""
clause = cls()
phrases = []
phrase_lines: list[str] = []
for line in knp_text.split("\n"):
if not line.strip():
continue
if Phrase.is_phrase_line(line) and phrase_lines:
phrases.append(Phrase.from_knp("\n".join(phrase_lines)))
phrase_lines = []
phrase_lines.append(line)
phrase = Phrase.from_knp("\n".join(phrase_lines))
phrases.append(phrase)
clause.phrases = phrases
return clause
[docs]
def to_knp(self) -> str:
"""KNP フォーマットに変換."""
return "".join(phrase.to_knp() for phrase in self.phrases)