1
0
mirror of https://github.com/profanity-im/profanity.git synced 2024-06-30 21:55:24 +00:00
profanity/src/tools/parser.c

369 lines
10 KiB
C
Raw Normal View History

2012-11-18 00:07:00 +00:00
/*
2013-02-02 23:42:45 +00:00
* parser.c
2012-11-18 00:07:00 +00:00
*
2013-01-11 02:05:29 +00:00
* Copyright (C) 2012, 2013 James Booth <boothj5@gmail.com>
2012-11-18 00:07:00 +00:00
*
* This file is part of Profanity.
*
* Profanity is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Profanity is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Profanity. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include <stdlib.h>
#include <string.h>
#include <glib.h>
/*
* Take a full line of input and return an array of strings representing
* the arguments of a command.
* If the number of arguments found is less than min, or more than max
* NULL is returned.
*
* inp - The line of input
* min - The minimum allowed number of arguments
* max - The maxmimum allowed number of arguments
*
* Returns - An NULL terminated array of strings representing the aguments
* of the command, or NULL if the validation fails.
*
* E.g. the following input line:
*
* /cmd arg1 arg2
*
* Will return a pointer to the following array:
*
* { "arg1", "arg2", NULL }
*
*/
2012-11-18 00:07:00 +00:00
gchar **
parse_args(const char * const inp, int min, int max)
2012-11-18 00:07:00 +00:00
{
if (inp == NULL) {
return NULL;
}
// copy and strip input of leading/trailing whitepsace
char *copy = strdup(inp);
g_strstrip(copy);
2013-07-11 23:46:33 +00:00
int inp_size = g_utf8_strlen(copy, -1);
2012-11-18 00:07:00 +00:00
gboolean in_token = FALSE;
gboolean in_quotes = FALSE;
2012-11-18 00:07:00 +00:00
char *token_start = &copy[0];
int token_size = 0;
GSList *tokens = NULL;
// add tokens to GSList
int i;
2013-07-11 23:46:33 +00:00
for (i = 0; i < inp_size; i++) {
gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
gunichar curr_uni = g_utf8_get_char(curr_ch);
2012-11-18 00:07:00 +00:00
if (!in_token) {
2013-07-11 23:46:33 +00:00
if (curr_uni == ' ') {
2012-11-18 00:07:00 +00:00
continue;
} else {
in_token = TRUE;
2013-07-11 23:46:33 +00:00
if (curr_uni == '"') {
in_quotes = TRUE;
i++;
gchar *next_ch = g_utf8_next_char(curr_ch);
gunichar next_uni = g_utf8_get_char(next_ch);
token_start = next_ch;
token_size += g_unichar_to_utf8(next_uni, NULL);
} else {
token_start = curr_ch;
token_size += g_unichar_to_utf8(curr_uni, NULL);
}
2012-11-18 00:07:00 +00:00
}
} else {
if (in_quotes) {
2013-07-11 23:46:33 +00:00
if (curr_uni == '"') {
tokens = g_slist_append(tokens, g_strndup(token_start,
token_size));
token_size = 0;
in_token = FALSE;
in_quotes = FALSE;
} else {
2013-07-11 23:46:33 +00:00
token_size += g_unichar_to_utf8(curr_uni, NULL);
}
2012-11-18 00:07:00 +00:00
} else {
2013-07-11 23:46:33 +00:00
if (curr_uni == ' ') {
tokens = g_slist_append(tokens, g_strndup(token_start,
token_size));
token_size = 0;
in_token = FALSE;
} else {
2013-07-11 23:46:33 +00:00
token_size += g_unichar_to_utf8(curr_uni, NULL);
}
2012-11-18 00:07:00 +00:00
}
}
}
if (in_token) {
tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
}
2013-07-11 23:46:33 +00:00
int num = g_slist_length(tokens) - 1;
// if num args not valid return NULL
if ((num < min) || (num > max)) {
g_slist_free_full(tokens, free);
2012-11-26 02:20:44 +00:00
g_free(copy);
return NULL;
2012-11-18 00:38:37 +00:00
// if min allowed is 0 and 0 found, return empty char* array
} else if (min == 0 && num == 0) {
2012-11-26 02:20:44 +00:00
g_slist_free_full(tokens, free);
gchar **args = malloc((num + 1) * sizeof(*args));
2012-11-18 00:38:37 +00:00
args[0] = NULL;
2012-11-26 02:20:44 +00:00
g_free(copy);
2012-11-18 00:38:37 +00:00
return args;
// otherwise return args array
} else {
gchar **args = malloc((num + 1) * sizeof(*args));
GSList *token = tokens;
token = g_slist_next(token);
int arg_count = 0;
while (token != NULL) {
args[arg_count++] = strdup(token->data);
token = g_slist_next(token);
}
args[arg_count] = NULL;
g_slist_free_full(tokens, free);
2012-11-26 02:20:44 +00:00
g_free(copy);
return args;
}
}
/*
* Take a full line of input and return an array of strings representing
* the arguments of a command. This function handles when the last parameter
* to the command is free text e.g.
*
* /msg user@host here is a message
*
* If the number of arguments found is less than min, or more than max
* NULL is returned.
*
* inp - The line of input
* min - The minimum allowed number of arguments
* max - The maxmimum allowed number of arguments
*
* Returns - An NULL terminated array of strings representing the aguments
* of the command, or NULL if the validation fails.
*
* E.g. the following input line:
*
* /cmd arg1 arg2 some free text
*
* Will return a pointer to the following array:
*
* { "arg1", "arg2", "some free text", NULL }
*
*/
gchar **
parse_args_with_freetext(const char * const inp, int min, int max)
{
if (inp == NULL) {
return NULL;
}
// copy and strip input of leading/trailing whitepsace
char *copy = strdup(inp);
g_strstrip(copy);
int inp_size = g_utf8_strlen(copy, -1);
gboolean in_token = FALSE;
gboolean in_freetext = FALSE;
gboolean in_quotes = FALSE;
char *token_start = &copy[0];
int token_size = 0;
int num_tokens = 0;
GSList *tokens = NULL;
// add tokens to GSList
int i;
for (i = 0; i < inp_size; i++) {
gchar *curr_ch = g_utf8_offset_to_pointer(copy, i);
gunichar curr_uni = g_utf8_get_char(curr_ch);
if (!in_token) {
if (curr_uni == ' ') {
continue;
} else {
in_token = TRUE;
num_tokens++;
if (num_tokens == max + 1) {
in_freetext = TRUE;
} else if (curr_uni == '"') {
in_quotes = TRUE;
i++;
gchar *next_ch = g_utf8_next_char(curr_ch);
gunichar next_uni = g_utf8_get_char(next_ch);
token_start = next_ch;
token_size += g_unichar_to_utf8(next_uni, NULL);
}
if (curr_uni == '"') {
gchar *next_ch = g_utf8_next_char(curr_ch);
token_start = next_ch;
} else {
token_start = curr_ch;
token_size += g_unichar_to_utf8(curr_uni, NULL);
}
}
} else {
if (in_quotes) {
if (curr_uni == '"') {
tokens = g_slist_append(tokens, g_strndup(token_start,
token_size));
token_size = 0;
in_token = FALSE;
in_quotes = FALSE;
} else {
if (curr_uni != '"') {
token_size += g_unichar_to_utf8(curr_uni, NULL);
}
}
} else {
if (in_freetext) {
token_size += g_unichar_to_utf8(curr_uni, NULL);
} else if (curr_uni == ' ') {
tokens = g_slist_append(tokens, g_strndup(token_start,
token_size));
token_size = 0;
in_token = FALSE;
} else if (curr_uni != '"') {
token_size += g_unichar_to_utf8(curr_uni, NULL);
}
}
}
}
if (in_token) {
tokens = g_slist_append(tokens, g_strndup(token_start, token_size));
}
int num = g_slist_length(tokens) - 1;
2012-11-18 00:07:00 +00:00
// if num args not valid return NULL
if ((num < min) || (num > max)) {
2012-11-18 00:07:00 +00:00
g_slist_free_full(tokens, free);
free(copy);
return NULL;
2012-11-18 00:38:37 +00:00
// if min allowed is 0 and 0 found, return empty char* array
} else if (min == 0 && num == 0) {
gchar **args = malloc((num + 1) * sizeof(*args));
2012-11-18 00:38:37 +00:00
args[0] = NULL;
return args;
2012-11-18 00:07:00 +00:00
// otherwise return args array
} else {
gchar **args = malloc((num + 1) * sizeof(*args));
2012-11-18 00:07:00 +00:00
GSList *token = tokens;
token = g_slist_next(token);
int arg_count = 0;
while (token != NULL) {
args[arg_count++] = strdup(token->data);
token = g_slist_next(token);
}
args[arg_count] = NULL;
g_slist_free_full(tokens, free);
free(copy);
return args;
}
}
int
count_tokens(char *string)
{
int length = g_utf8_strlen(string, -1);
gboolean in_quotes = FALSE;
int num_tokens = 0;
int i = 0;
// include first token
num_tokens++;
for (i = 0; i < length; i++) {
gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
gunichar curr_uni = g_utf8_get_char(curr_ch);
if (curr_uni == ' ') {
if (!in_quotes) {
num_tokens++;
}
} else if (curr_uni == '"') {
if (in_quotes) {
in_quotes = FALSE;
} else {
in_quotes = TRUE;
}
}
}
return num_tokens;
}
char *
get_start(char *string, int tokens)
{
GString *result = g_string_new("");
int length = g_utf8_strlen(string, -1);
gboolean in_quotes = FALSE;
char *result_str = NULL;
int num_tokens = 0;
int i = 0;
// include first token
num_tokens++;
for (i = 0; i < length; i++) {
gchar *curr_ch = g_utf8_offset_to_pointer(string, i);
gunichar curr_uni = g_utf8_get_char(curr_ch);
if (num_tokens < tokens) {
gchar *uni_char = malloc(7);
int len = g_unichar_to_utf8(curr_uni, uni_char);
uni_char[len] = '\0';
g_string_append(result, uni_char);
}
if (curr_uni == ' ') {
if (!in_quotes) {
num_tokens++;
}
} else if (curr_uni == '"') {
if (in_quotes) {
in_quotes = FALSE;
} else {
in_quotes = TRUE;
}
}
}
result_str = result->str;
g_string_free(result, FALSE);
return result_str;
}