Coding standards
Security in ResourceSpace
Developer reference
Database
Action functions
Admin functions
Ajax functions
Annotation functions
API functions
Collections functions
Comment functions
Config functions
CSV export functions
Dash functions
Debug functions
Encryption functions
Facial recognition functions
File functions
General functions
Language functions
Log functions
Login functions
Message functions
Migration functions
Node functions
PDF functions
Plugin functions
Render functions
Reporting functions
Request functions
Research functions
Slideshow functions
Theme permission functions
User functions
Video functions
Database functions
Metadata functions
Resource functions
Search functions
Map functions
Job functions
Tab functions
Test functions

split_keywords()

Parameters

ColumnTypeDefaultDescription
$search
$index false
$partial_index false
$is_date false
$is_html false
$keepquotes false

Location

include/search_functions.php lines 2141 to 2262

Definition

 
function split_keywords($search,$index=false,$partial_index=false,$is_date=false,$is_html=false$keepquotes=false)
    {
    
# Takes $search and returns an array of individual keywords.
    
global $permitted_html_tags$permitted_html_attributes;

    if (
$index && $is_date)
        {
        
# Date handling... index a little differently to support various levels of date matching (Year, Year+Month, Year+Month+Day).
        
$s=explode("-",$search);
        if (
count($s)>=3)
            {
            return array(
$s[0],$s[0] . "-" $s[1],$search);
            }
        elseif (
is_array($search))
            {
            return 
$search;
            }
        else
            {
            return array(
$search);
            }
        }

    
# Remove any real / unescaped lf/cr
    
$search=str_replace("\r"," ",$search);
    
$search=str_replace("\n"," ",$search);
    
$search=str_replace("\\r"," ",$search);
    
$search=str_replace("\\n"," ",$search);

    if(
$is_html || (substr($search,0,1) == "<" && substr($search,-1,1) == ">"))
        {
        
// String can't be in encoded format at this point or string won't be indexed correctly.
        
$search=html_entity_decode($search);
        if(
$index)
            {
            
// Clean up html for indexing
            // Allow indexing of anchor text
            
$allowed_tags array_merge(array("a"),$permitted_html_tags);
            
$allowed_attributes array_merge(array("href"),$permitted_html_attributes);
            
$search=strip_tags_and_attributes($search,$allowed_tags,$allowed_attributes);

            
// Get rid of the actual html tags and attribute ids to prevent indexing these
            
foreach ($allowed_tags as $allowed_tag)
                {
                
$search=str_replace(array("<" $allowed_tag ">","<" $allowed_tag,"</" $allowed_tag)," ",$search);
                }
            foreach (
$allowed_attributes as $allowed_attribute)
                {
                
$search=str_replace($allowed_attribute "="," ",$search);
                }
            
// Remove any left over tag parts
            
$search=str_replace(array(">""<","="), " ",$search);
            }
        }

    
$ns=trim_spaces($search);

    if (
$index==false && strpos($ns,":")!==false# special 'constructed' query type
        
{
        if(
$keepquotes)
            {
            
preg_match_all('/("|-")(?:\\\\.|[^\\\\"])*"|\S+/'$ns$matches);
            
$return=trim_array($matches[0],",");
            }
        elseif (
strpos($ns,"startdate") !== false || strpos($ns,"enddate") !== false)
            {
            
$return=explode(",",$ns);
            }
        else
            {
            
$ns=cleanse_string($ns,false,!$index,$is_html);
            
$return=explode(" ",$ns);
            }
        
// If we are not breaking quotes we may end up a with commas in the array of keywords which need to be removed
        
if($keepquotes)
            {
            
$return trim_array($return,",");
            }
        return 
$return;
        }
    else
        {
        
# split using spaces and similar chars (according to configured whitespace characters)
        
if(!$index && $keepquotes && strpos($ns,"\"")!==false)
            {
            
preg_match_all('/("|-")(?:\\\\.|[^\\\\"])*"|\S+/'$ns$matches);

            
$splits=$matches[0];
            
$ns=array();
            foreach (
$splits as $split)
                {
                if(!(
substr($split,0,1)=="\"" && substr($split,-1,1)=="\"") && strpos($split,",")!==false)
                    {
                    
$split=explode(",",$split);
                    
$ns array_merge($ns,$split);
                    }
                else
                    {
                    
$ns[] = $split;
                    }
                }


            }
        else
            {
            
# split using spaces and similar chars (according to configured whitespace characters)
            
$ns=explode(" ",cleanse_string($ns,false,!$index,$is_html));
            }

        if(
$keepquotes)
            {
            
$ns trim_array($ns,",");
            }

        if (
$index && $partial_index) {
            return 
add_partial_index($ns);
        }
        return 
$ns;
        }

    }

This article was last updated 7th September 2024 21:35 Europe/London time based on the source file dated 17th July 2024 15:30 Europe/London time.