Source code for rhoknp.cohesion.discourse

import logging
import re
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING, ClassVar, Optional

if TYPE_CHECKING:
    from rhoknp import Clause, Sentence

logger = logging.getLogger(__name__)


[docs] class DiscourseRelationLabel(Enum): """談話関係ラベルを表す列挙体.""" NO_RELATION = "談話関係なし" CAUSE_REASON = "原因・理由" PURPOSE = "目的" CONDITION = "条件" EVIDENCE = "根拠" CONTRAST = "対比" CONCESSION = "逆接"
[docs] class DiscourseRelationTag(Enum): """談話関係タグを表す列挙体.""" NO_RELATION = "談話関係なし" CAUSE_REASON = "原因・理由" CAUSE_REASON_FORWARD = "原因・理由(順方向)" CAUSE_REASON_BACKWARD = "原因・理由(逆方向)" CAUSE_REASON_BACKWARD2 = "原因・理由-逆" PURPOSE = "目的" PURPOSE_FORWARD = "目的(順方向)" PURPOSE_BACKWARD = "目的(逆方向)" CONDITION = "条件" CONDITION_FORWARD = "条件(順方向)" CONDITION_BACKWARD = "条件(逆方向)" NEGATIVE_CONDITION = "否定条件" CONTRAST = "対比" CONTRAST_NO_DIRECTION = "対比(方向なし)" CONCESSION = "逆接" CONCESSION_FORWARD = "逆接・譲歩(順方向)" CONCESSION_BACKWARD = "逆接・譲歩(逆方向)" CONCESSIVE_CONDITION = "条件-逆条件" EVIDENCE = "根拠" EVIDENCE_FORWARD = "その他根拠(順方向)" EVIDENCE_BACKWARD = "その他根拠(逆方向)"
[docs] @classmethod def has_value(cls, value: str) -> bool: """指定された値が存在すれば True. Args: value: 談話関係タグ. """ return any(value == item.value for item in cls)
@property def label(self) -> DiscourseRelationLabel: """タグに対応する談話関係のラベルを返却.""" if self in { DiscourseRelationTag.NO_RELATION, }: return DiscourseRelationLabel.NO_RELATION elif self in { DiscourseRelationTag.CAUSE_REASON, DiscourseRelationTag.CAUSE_REASON_FORWARD, DiscourseRelationTag.CAUSE_REASON_BACKWARD, DiscourseRelationTag.CAUSE_REASON_BACKWARD2, }: return DiscourseRelationLabel.CAUSE_REASON elif self in { DiscourseRelationTag.PURPOSE, DiscourseRelationTag.PURPOSE_FORWARD, DiscourseRelationTag.PURPOSE_BACKWARD, }: return DiscourseRelationLabel.PURPOSE elif self in { DiscourseRelationTag.CONDITION, DiscourseRelationTag.CONDITION_FORWARD, DiscourseRelationTag.CONDITION_BACKWARD, DiscourseRelationTag.NEGATIVE_CONDITION, }: return DiscourseRelationLabel.CONDITION elif self in { DiscourseRelationTag.CONTRAST, DiscourseRelationTag.CONTRAST_NO_DIRECTION, }: return DiscourseRelationLabel.CONTRAST elif self in { DiscourseRelationTag.CONCESSION, DiscourseRelationTag.CONCESSION_FORWARD, DiscourseRelationTag.CONCESSION_BACKWARD, DiscourseRelationTag.CONCESSIVE_CONDITION, }: return DiscourseRelationLabel.CONCESSION elif self in { DiscourseRelationTag.EVIDENCE, DiscourseRelationTag.EVIDENCE_FORWARD, DiscourseRelationTag.EVIDENCE_BACKWARD, }: return DiscourseRelationLabel.EVIDENCE raise AssertionError # unreachable
[docs] def is_swap_needed(self) -> bool: """談話関係が逆方向であれば True.""" return self in { DiscourseRelationTag.CAUSE_REASON_BACKWARD, DiscourseRelationTag.CAUSE_REASON_BACKWARD2, DiscourseRelationTag.PURPOSE_BACKWARD, DiscourseRelationTag.CONDITION_BACKWARD, DiscourseRelationTag.CONCESSION_BACKWARD, DiscourseRelationTag.EVIDENCE_BACKWARD, }
[docs] @dataclass class DiscourseRelation: """談話関係クラス""" CLAUSE_FUNCTION_PAT: ClassVar[re.Pattern] = re.compile(r"節-機能-(?P<label>.+)") BACKWARD_CLAUSE_FUNCTION_PAT: ClassVar[re.Pattern] = re.compile(r"節-前向き機能-(?P<label>.+)") DISCOURSE_RELATION_PAT: ClassVar[re.Pattern] = re.compile( r"(?P<sid>[^/]+)/(?P<base_phrase_index>\d+)/(?P<tag>[^/]+)" ) sid: str #: 主辞の文ID. base_phrase_index: int #: 主辞の基本句インデックス. label: DiscourseRelationLabel #: 談話関係ラベル. tag: DiscourseRelationTag #: 談話関係タグ. modifier: "Clause" #: 修飾節. head: "Clause" #: 主辞節. is_explicit: bool = False #: 明示的な談話関係ならTrue.. def __hash__(self) -> int: return hash((self.label, self.modifier, self.head)) def __eq__(self, other: object) -> bool: if not isinstance(other, type(self)): return False return self.label == other.label and self.modifier == other.modifier and self.head == other.head
[docs] @classmethod def from_clause_function_fstring(cls, fstring: str, modifier: "Clause") -> Optional["DiscourseRelation"]: """節機能を表す素性文字列から初期化. Args: fstring: 節機能を表す素性文字列. modifier: 修飾節. .. note:: 節機能由来で認定された談話関係は明示的 (explicit) とみなす. """ match = cls.CLAUSE_FUNCTION_PAT.match(fstring) if match is None: return None label = match["label"] if not DiscourseRelationTag.has_value(label): return None tag = DiscourseRelationTag(label) label = tag.label head = modifier.parent if head is None: return None if tag.is_swap_needed(): # NOTE: Currently, no clause function requires swap. modifier, head = head, modifier # pragma: no cover return cls( sid=modifier.sentence.sid, base_phrase_index=head.end.index, label=label, tag=tag, modifier=modifier, head=head, is_explicit=True, )
[docs] @classmethod def from_backward_clause_function_fstring(cls, fstring: str, head: "Clause") -> Optional["DiscourseRelation"]: """前向き節機能を表す素性文字列から初期化. Args: fstring: 前向き節機能を表す素性文字列. head: 主節. .. note:: 前向き節機能由来で認定された談話関係は明示的 (explicit) とみなす. """ match = cls.BACKWARD_CLAUSE_FUNCTION_PAT.match(fstring) if match is None: return None label = match["label"] if not DiscourseRelationTag.has_value(label): return None tag = DiscourseRelationTag(label) label = tag.label if not head.sentence.has_document(): return None # cannot find modifier if head.sentence.index == 0: return None # cannot find modifier modifier = head.sentence.document.sentences[head.sentence.index - 1].clauses[-1] if tag.is_swap_needed(): modifier, head = head, modifier return cls( sid=head.sentence.sid, base_phrase_index=head.end.index, label=label, tag=tag, modifier=modifier, head=head, is_explicit=True, )
[docs] @classmethod def from_discourse_relation_fstring(cls, fstring: str, modifier: "Clause") -> Optional["DiscourseRelation"]: """談話関係を表す素性文字列から初期化. Args: fstring: 談話関係を表す素性文字列. modifier: 修飾節. """ match = re.match(cls.DISCOURSE_RELATION_PAT, fstring) if match is None: logger.warning(f"'{fstring}' is not a valid discourse relation fstring") return None sid = match["sid"] base_phrase_index = int(match["base_phrase_index"]) tag = match["tag"] if not DiscourseRelationTag.has_value(tag): logger.warning(f"unknown discourse relation label '{tag}' found") return None tag = DiscourseRelationTag(tag) category = tag.label head_sentence: "Sentence" | None = None if modifier.sentence.has_document(): sentences = modifier.document.sentences else: sentences = [modifier.sentence] for sentence in sentences: if sentence.sid == sid: head_sentence = sentence break if head_sentence is None: logger.warning(f"{sid} not found") return None if base_phrase_index >= len(head_sentence.base_phrases): logger.warning(f"index out of range in {sid}") return None head_base_phrase = head_sentence.base_phrases[base_phrase_index] head = head_base_phrase.clause if head.end != head_base_phrase: logger.warning(f"invalid clause tag in {sid}") return None if tag.is_swap_needed(): modifier, head = head, modifier return cls(sid, base_phrase_index, category, tag, modifier, head)
[docs] def to_fstring(self) -> str: """談話関係を素性文字列に変換する.""" return f"<談話関係:{self.sid}/{self.base_phrase_index}/{self.label.value}>"