Top

ccg_nlpy.core.view module

import json
import logging

logger = logging.getLogger(__name__)

class View(object):
    def __str__(self):
        """
        Special method to print the view in (label, tokens) format
        """
        constituent_label_string = ""
        if self.cons_list is None:
            constituent_label_string = "this view does not have constituents in your input text. "
        else:
            for cons in self.cons_list:
                constituent_label_string += "(" + cons['label'] + " " + cons['tokens'] + ") "
        return self.view_name + " view: " + constituent_label_string

    # The three following functions are used to make View class be able to iterate and index
    def __iter__(self):
        index = 0
        while index < len(self.cons_list):
            yield self.cons_list[index]
            index += 1

    def __getitem__(self, index):
        return self.cons_list[index]

    def __len__(self):
        return len(self.cons_list)


    def __init__(self, view, tokens):
        """
        Constructor for the view

        @param: view, the decoded JSON object containing information of the view
                tokens, List of tokens in the view
        """
        self.view_name = view["viewName"]
        self.tokens = tokens

        # get view_type: TreeView, PredicateArgument, TokenLabelView, ...
        full_type = view["viewData"][0]["viewType"]
        split_by_period = full_type.split(".")
        self.view_type = split_by_period[len(split_by_period) - 1]

        self.cons_list = None
        self.relation_array = None

        if "constituents" in view["viewData"][0]:
            self.cons_list = []
            for constituent in view["viewData"][0]["constituents"]:
                # Labels of TOKENS view will not be recorded when serializing text annotation in JSON format in pipeline
                # So there is a statement for handling this 
                cons_tokens = self.tokens[constituent['start']]
                for index in range(constituent['start']+1, constituent['end']):
                    cons_tokens += ' '
                    cons_tokens += self.tokens[index]
                if self.view_name == 'TOKENS':
                    constituent['label'] = cons_tokens
                constituent['tokens'] = cons_tokens
                self.cons_list.append(constituent)

        if "relations" in view["viewData"][0]:
            self.relation_array = []
            for relation in view["viewData"][0]["relations"]:
                self.relation_array.append(relation)
            self._link_constituents()

    def _link_constituents(self):
        """
        Helper function to build connection between constituents based on relation
        This function will be called only when relations exist
        """
        for relation_index in range(len(self.relation_array)):
            relation = self.relation_array[relation_index]
            src = self.cons_list[relation['srcConstituent']]
            target = self.cons_list[relation['targetConstituent']]
            if 'outgoing_relations' not in src:
                src['outgoing_relations'] = []
            if 'incoming_relations' not in target:
                target['incoming_relations'] = []
            src['outgoing_relations'].append(relation_index)
            target['incoming_relations'].append(relation_index)

    def get_view_type(self):
        """
        Function to get type of the view

        @return view type of the view
        """
        return self.view_type

    def get_cons(self, position=None, key=None):
        """
        Function to get a list of constituents in the view

        @param: position, the index of the specific constituent that user wants
                key, the specific key in constituents that user wants ("score", "label", "position","tokens")
        @return: if key is not given, a list of all constituents if position is not given,
                 or a list contains the constituent at specified position if position is given
                 otherwise, a list of specific key in respect to constituents

        """
        if self.cons_list is None:
            logger.warn("This view does not have constituents in your input text")
            return None

        if key is None:
            if position is not None and 0 <= position < len(self.cons_list):
                return [self.cons_list[position]]
            else:
                return self.cons_list
        elif key == "score" or key == "label" or key == "position" or key == "tokens":
            result_list = []
            if position is not None and 0 <= position < len(
                    self.cons_list):
                if key == "position":
                    result_list.append((self.cons_list[position]["start"],
                                        self.cons_list[position]["end"]))
                else:
                    result_list.append(self.cons_list[position][key])
            else:
                for constituent in self.cons_list:
                    if key == "position":
                        result_list.append(
                            (constituent["start"], constituent["end"]))
                    else:
                        result_list.append(constituent[key])
            return result_list

        logger.warn("Invalid key in constituent")
        return None

    def get_con_score(self, position=None):
        """
        Wrapper function to get a list of scores of constituents in the view

        @param: position, the index of the specific constituent that user wants score from
        @return: list of scores of all constituents if position is not given,
                 otherwise return a list contains the score of the constituent at specified position

        """
        return self.get_cons(position, "score")

    def get_con_label(self, position=None):
        """
        Wrapper function to get a list of labels of constituents in the view 

        @param: position, the index of the specific constituent that user wants label from 
        @return: list of labels of all constituents if position is not given, 
                 otherwise return a list contains the label of the constituent at specified position 
        """
        return self.get_cons(position, "label")

    def get_con_position(self, position=None):
        """
        Wrapper function to get a list of positions of constituents in the view in respect to tokens of the text

        @param: position, the index of the specific constituent that user wants token position from
        @return: list of position tuples (start_pos, end_pos) of all constituents if position is not given, 
                 otherwise return a list contains the token position of the constituent at specified position 
        """
        return self.get_cons(position, "position")

    def get_relations(self, position=None):
        """
        Funtion to get the relation array if the view supports relations

        @param: position, the index of the specific relation that user wants
        @return: list of relations if position is not given,
                 otherwise return a list contains the relation at specified position
        """
        if self.relation_array is None:
            logger.warn("This view does not support relations")
            return None
        else:
            if position is not None and 0 <= position < len(
                    self.relation_array):
                return [self.relation_array[position]]
            else:
                return self.relation_array

    def get_overlapping_constituents(self, start_token_index, end_token_index):
        """
        Function to get a list of constituents in the view that overlap with the indices provided

        @param: start_token_index, the starting index of the range for overlapping
                end_token_index, the ending index of the range for overlapping
        @return: List of overlapping constituents if the indice are valid, None otherwise
        """
        if start_token_index > end_token_index:
            logger.warn("Invalid token indices given, please provide proper indices.")
            return None
        view_overlapping_span = []
        for cons in self.cons_list:
            if((cons['start'] <= start_token_index and cons['end'] >= start_token_index) or
                    (cons['start'] <= end_token_index and cons['end'] >= end_token_index) or
                    (cons['start'] >= start_token_index and cons['end'] <= end_token_index) or
                    (cons['start'] <= start_token_index and cons['end'] >= end_token_index)):
                view_overlapping_span.append(cons)
        return view_overlapping_span

Module variables

var logger

Classes

class View

class View(object):
    def __str__(self):
        """
        Special method to print the view in (label, tokens) format
        """
        constituent_label_string = ""
        if self.cons_list is None:
            constituent_label_string = "this view does not have constituents in your input text. "
        else:
            for cons in self.cons_list:
                constituent_label_string += "(" + cons['label'] + " " + cons['tokens'] + ") "
        return self.view_name + " view: " + constituent_label_string

    # The three following functions are used to make View class be able to iterate and index
    def __iter__(self):
        index = 0
        while index < len(self.cons_list):
            yield self.cons_list[index]
            index += 1

    def __getitem__(self, index):
        return self.cons_list[index]

    def __len__(self):
        return len(self.cons_list)


    def __init__(self, view, tokens):
        """
        Constructor for the view

        @param: view, the decoded JSON object containing information of the view
                tokens, List of tokens in the view
        """
        self.view_name = view["viewName"]
        self.tokens = tokens

        # get view_type: TreeView, PredicateArgument, TokenLabelView, ...
        full_type = view["viewData"][0]["viewType"]
        split_by_period = full_type.split(".")
        self.view_type = split_by_period[len(split_by_period) - 1]

        self.cons_list = None
        self.relation_array = None

        if "constituents" in view["viewData"][0]:
            self.cons_list = []
            for constituent in view["viewData"][0]["constituents"]:
                # Labels of TOKENS view will not be recorded when serializing text annotation in JSON format in pipeline
                # So there is a statement for handling this 
                cons_tokens = self.tokens[constituent['start']]
                for index in range(constituent['start']+1, constituent['end']):
                    cons_tokens += ' '
                    cons_tokens += self.tokens[index]
                if self.view_name == 'TOKENS':
                    constituent['label'] = cons_tokens
                constituent['tokens'] = cons_tokens
                self.cons_list.append(constituent)

        if "relations" in view["viewData"][0]:
            self.relation_array = []
            for relation in view["viewData"][0]["relations"]:
                self.relation_array.append(relation)
            self._link_constituents()

    def _link_constituents(self):
        """
        Helper function to build connection between constituents based on relation
        This function will be called only when relations exist
        """
        for relation_index in range(len(self.relation_array)):
            relation = self.relation_array[relation_index]
            src = self.cons_list[relation['srcConstituent']]
            target = self.cons_list[relation['targetConstituent']]
            if 'outgoing_relations' not in src:
                src['outgoing_relations'] = []
            if 'incoming_relations' not in target:
                target['incoming_relations'] = []
            src['outgoing_relations'].append(relation_index)
            target['incoming_relations'].append(relation_index)

    def get_view_type(self):
        """
        Function to get type of the view

        @return view type of the view
        """
        return self.view_type

    def get_cons(self, position=None, key=None):
        """
        Function to get a list of constituents in the view

        @param: position, the index of the specific constituent that user wants
                key, the specific key in constituents that user wants ("score", "label", "position","tokens")
        @return: if key is not given, a list of all constituents if position is not given,
                 or a list contains the constituent at specified position if position is given
                 otherwise, a list of specific key in respect to constituents

        """
        if self.cons_list is None:
            logger.warn("This view does not have constituents in your input text")
            return None

        if key is None:
            if position is not None and 0 <= position < len(self.cons_list):
                return [self.cons_list[position]]
            else:
                return self.cons_list
        elif key == "score" or key == "label" or key == "position" or key == "tokens":
            result_list = []
            if position is not None and 0 <= position < len(
                    self.cons_list):
                if key == "position":
                    result_list.append((self.cons_list[position]["start"],
                                        self.cons_list[position]["end"]))
                else:
                    result_list.append(self.cons_list[position][key])
            else:
                for constituent in self.cons_list:
                    if key == "position":
                        result_list.append(
                            (constituent["start"], constituent["end"]))
                    else:
                        result_list.append(constituent[key])
            return result_list

        logger.warn("Invalid key in constituent")
        return None

    def get_con_score(self, position=None):
        """
        Wrapper function to get a list of scores of constituents in the view

        @param: position, the index of the specific constituent that user wants score from
        @return: list of scores of all constituents if position is not given,
                 otherwise return a list contains the score of the constituent at specified position

        """
        return self.get_cons(position, "score")

    def get_con_label(self, position=None):
        """
        Wrapper function to get a list of labels of constituents in the view 

        @param: position, the index of the specific constituent that user wants label from 
        @return: list of labels of all constituents if position is not given, 
                 otherwise return a list contains the label of the constituent at specified position 
        """
        return self.get_cons(position, "label")

    def get_con_position(self, position=None):
        """
        Wrapper function to get a list of positions of constituents in the view in respect to tokens of the text

        @param: position, the index of the specific constituent that user wants token position from
        @return: list of position tuples (start_pos, end_pos) of all constituents if position is not given, 
                 otherwise return a list contains the token position of the constituent at specified position 
        """
        return self.get_cons(position, "position")

    def get_relations(self, position=None):
        """
        Funtion to get the relation array if the view supports relations

        @param: position, the index of the specific relation that user wants
        @return: list of relations if position is not given,
                 otherwise return a list contains the relation at specified position
        """
        if self.relation_array is None:
            logger.warn("This view does not support relations")
            return None
        else:
            if position is not None and 0 <= position < len(
                    self.relation_array):
                return [self.relation_array[position]]
            else:
                return self.relation_array

    def get_overlapping_constituents(self, start_token_index, end_token_index):
        """
        Function to get a list of constituents in the view that overlap with the indices provided

        @param: start_token_index, the starting index of the range for overlapping
                end_token_index, the ending index of the range for overlapping
        @return: List of overlapping constituents if the indice are valid, None otherwise
        """
        if start_token_index > end_token_index:
            logger.warn("Invalid token indices given, please provide proper indices.")
            return None
        view_overlapping_span = []
        for cons in self.cons_list:
            if((cons['start'] <= start_token_index and cons['end'] >= start_token_index) or
                    (cons['start'] <= end_token_index and cons['end'] >= end_token_index) or
                    (cons['start'] >= start_token_index and cons['end'] <= end_token_index) or
                    (cons['start'] <= start_token_index and cons['end'] >= end_token_index)):
                view_overlapping_span.append(cons)
        return view_overlapping_span

Ancestors (in MRO)

  • View
  • builtins.object

Static methods

def __init__(

self, view, tokens)

Constructor for the view

@param: view, the decoded JSON object containing information of the view tokens, List of tokens in the view

def __init__(self, view, tokens):
    """
    Constructor for the view
    @param: view, the decoded JSON object containing information of the view
            tokens, List of tokens in the view
    """
    self.view_name = view["viewName"]
    self.tokens = tokens
    # get view_type: TreeView, PredicateArgument, TokenLabelView, ...
    full_type = view["viewData"][0]["viewType"]
    split_by_period = full_type.split(".")
    self.view_type = split_by_period[len(split_by_period) - 1]
    self.cons_list = None
    self.relation_array = None
    if "constituents" in view["viewData"][0]:
        self.cons_list = []
        for constituent in view["viewData"][0]["constituents"]:
            # Labels of TOKENS view will not be recorded when serializing text annotation in JSON format in pipeline
            # So there is a statement for handling this 
            cons_tokens = self.tokens[constituent['start']]
            for index in range(constituent['start']+1, constituent['end']):
                cons_tokens += ' '
                cons_tokens += self.tokens[index]
            if self.view_name == 'TOKENS':
                constituent['label'] = cons_tokens
            constituent['tokens'] = cons_tokens
            self.cons_list.append(constituent)
    if "relations" in view["viewData"][0]:
        self.relation_array = []
        for relation in view["viewData"][0]["relations"]:
            self.relation_array.append(relation)
        self._link_constituents()

def get_con_label(

self, position=None)

Wrapper function to get a list of labels of constituents in the view

@param: position, the index of the specific constituent that user wants label from @return: list of labels of all constituents if position is not given, otherwise return a list contains the label of the constituent at specified position

def get_con_label(self, position=None):
    """
    Wrapper function to get a list of labels of constituents in the view 
    @param: position, the index of the specific constituent that user wants label from 
    @return: list of labels of all constituents if position is not given, 
             otherwise return a list contains the label of the constituent at specified position 
    """
    return self.get_cons(position, "label")

def get_con_position(

self, position=None)

Wrapper function to get a list of positions of constituents in the view in respect to tokens of the text

@param: position, the index of the specific constituent that user wants token position from @return: list of position tuples (start_pos, end_pos) of all constituents if position is not given, otherwise return a list contains the token position of the constituent at specified position

def get_con_position(self, position=None):
    """
    Wrapper function to get a list of positions of constituents in the view in respect to tokens of the text
    @param: position, the index of the specific constituent that user wants token position from
    @return: list of position tuples (start_pos, end_pos) of all constituents if position is not given, 
             otherwise return a list contains the token position of the constituent at specified position 
    """
    return self.get_cons(position, "position")

def get_con_score(

self, position=None)

Wrapper function to get a list of scores of constituents in the view

@param: position, the index of the specific constituent that user wants score from @return: list of scores of all constituents if position is not given, otherwise return a list contains the score of the constituent at specified position

def get_con_score(self, position=None):
    """
    Wrapper function to get a list of scores of constituents in the view
    @param: position, the index of the specific constituent that user wants score from
    @return: list of scores of all constituents if position is not given,
             otherwise return a list contains the score of the constituent at specified position
    """
    return self.get_cons(position, "score")

def get_cons(

self, position=None, key=None)

Function to get a list of constituents in the view

@param: position, the index of the specific constituent that user wants key, the specific key in constituents that user wants ("score", "label", "position","tokens") @return: if key is not given, a list of all constituents if position is not given, or a list contains the constituent at specified position if position is given otherwise, a list of specific key in respect to constituents

def get_cons(self, position=None, key=None):
    """
    Function to get a list of constituents in the view
    @param: position, the index of the specific constituent that user wants
            key, the specific key in constituents that user wants ("score", "label", "position","tokens")
    @return: if key is not given, a list of all constituents if position is not given,
             or a list contains the constituent at specified position if position is given
             otherwise, a list of specific key in respect to constituents
    """
    if self.cons_list is None:
        logger.warn("This view does not have constituents in your input text")
        return None
    if key is None:
        if position is not None and 0 <= position < len(self.cons_list):
            return [self.cons_list[position]]
        else:
            return self.cons_list
    elif key == "score" or key == "label" or key == "position" or key == "tokens":
        result_list = []
        if position is not None and 0 <= position < len(
                self.cons_list):
            if key == "position":
                result_list.append((self.cons_list[position]["start"],
                                    self.cons_list[position]["end"]))
            else:
                result_list.append(self.cons_list[position][key])
        else:
            for constituent in self.cons_list:
                if key == "position":
                    result_list.append(
                        (constituent["start"], constituent["end"]))
                else:
                    result_list.append(constituent[key])
        return result_list
    logger.warn("Invalid key in constituent")
    return None

def get_overlapping_constituents(

self, start_token_index, end_token_index)

Function to get a list of constituents in the view that overlap with the indices provided

@param: start_token_index, the starting index of the range for overlapping end_token_index, the ending index of the range for overlapping @return: List of overlapping constituents if the indice are valid, None otherwise

def get_overlapping_constituents(self, start_token_index, end_token_index):
    """
    Function to get a list of constituents in the view that overlap with the indices provided
    @param: start_token_index, the starting index of the range for overlapping
            end_token_index, the ending index of the range for overlapping
    @return: List of overlapping constituents if the indice are valid, None otherwise
    """
    if start_token_index > end_token_index:
        logger.warn("Invalid token indices given, please provide proper indices.")
        return None
    view_overlapping_span = []
    for cons in self.cons_list:
        if((cons['start'] <= start_token_index and cons['end'] >= start_token_index) or
                (cons['start'] <= end_token_index and cons['end'] >= end_token_index) or
                (cons['start'] >= start_token_index and cons['end'] <= end_token_index) or
                (cons['start'] <= start_token_index and cons['end'] >= end_token_index)):
            view_overlapping_span.append(cons)
    return view_overlapping_span

def get_relations(

self, position=None)

Funtion to get the relation array if the view supports relations

@param: position, the index of the specific relation that user wants @return: list of relations if position is not given, otherwise return a list contains the relation at specified position

def get_relations(self, position=None):
    """
    Funtion to get the relation array if the view supports relations
    @param: position, the index of the specific relation that user wants
    @return: list of relations if position is not given,
             otherwise return a list contains the relation at specified position
    """
    if self.relation_array is None:
        logger.warn("This view does not support relations")
        return None
    else:
        if position is not None and 0 <= position < len(
                self.relation_array):
            return [self.relation_array[position]]
        else:
            return self.relation_array

def get_view_type(

self)

Function to get type of the view

@return view type of the view

def get_view_type(self):
    """
    Function to get type of the view
    @return view type of the view
    """
    return self.view_type

Instance variables

var cons_list

var relation_array

var tokens

var view_name

var view_type