Source code for katz.prior

import numpy as np
from katz.back_off import BackOff
from katz.process_equations import standardise_file, SymbolCoder


[docs]
class KatzPrior:

    def __init__(self, n, basis_functions, in_eqfile, out_eqfile, input_delimiter=','):
        """Class to evaluate the probability of a function based on an n-gram Katz back-off model
        
        Args:
            :n (int): The length of the n-tuples to consider
            :basis_functions (list): List of basis functions to consider. Entries 0, 1 and 2 are lists of nullary, unary, and binary operators, respectively.
            :in_eqfile (str): Name of file containing the equations to study. If None, then equations read from out_eqfile
            :out_eqfile (str): Name of file to output the standardised equations to
            :input_delimiter (str): The delimiter used in the input csv file
            
        Returns:
            KatzPrior: Prior model to find prior of a function given a previous set of equations
        
        """
    
        self.n = n
        self.all_eq, self.maxvar = standardise_file(in_eqfile, out_eqfile, input_delimiter)
        self.basis_functions = [list(set(basis_functions[0] + ["a"] + [f"x{i}" for i in range(self.maxvar)])),  # type0
                                basis_functions[1],  # type1
                                basis_functions[2]]  # type2
        self.coder = SymbolCoder(self.basis_functions)
        data_left = []
        data_right = []
        for eq in self.all_eq:
            t = self.coder.process_all_equations(n+1, [eq], self.maxvar)
            data_left += [t[0]] + [tt[:-1] for tt in t[1:]]
            data_right += [tt for tt in t[1:] if tt[-1] != self.coder.code['None']]
        self.backoff_left = BackOff(data_left)
        self.backoff_right = BackOff(data_right)
        

[docs]
    def logprior(self, eq):
        """
        Compute the natural logarithm of the prior of a given equation
        
        Args:
            :eq (str): The equation to find the prior probability of
            
        Returns:
            :p (float): The natural logarithm of the prior of the supplied equation
        """

        t = self.coder.process_all_equations(self.n+1, [eq], self.maxvar)

        tleft = [t[0]] + [tt[:-1] for tt in t[1:]]
        pleft = np.array([self.backoff_left.get_pbo(tt[-1], tt[:-1]) for tt in tleft])
        
        tright = [tt for tt in t[1:] if tt[-1] != self.coder.code['None']]
        pright = np.array([self.backoff_right.get_pbo(tt[-1], tt[-(self.n+1):-1]) for tt in tright])
        
        p = np.sum(np.log(pleft)) + np.sum(np.log(pright))
        
        return p