mirror of
https://github.com/vim/vim.git
synced 2025-07-26 11:04:33 -04:00
Problem: The arabic shaping code is verbose. Solution: Shorten the code without changing the functionality.
720 lines
16 KiB
C
720 lines
16 KiB
C
/* vi:set ts=8 sts=4 sw=4 noet:
|
|
*
|
|
* VIM - Vi IMproved by Bram Moolenaar
|
|
*
|
|
* Do ":help uganda" in Vim to read copying and usage conditions.
|
|
* Do ":help credits" in Vim to see a list of people who contributed.
|
|
* See README.txt for an overview of the Vim source code.
|
|
*/
|
|
|
|
/*
|
|
* arabic.c: functions for Arabic language
|
|
*
|
|
* Author: Nadim Shaikli & Isam Bayazidi
|
|
*/
|
|
|
|
#include "vim.h"
|
|
|
|
#if defined(FEAT_ARABIC) || defined(PROTO)
|
|
|
|
static int A_is_a(int cur_c);
|
|
static int A_is_s(int cur_c);
|
|
static int A_is_f(int cur_c);
|
|
static int chg_c_a2s(int cur_c);
|
|
static int chg_c_a2i(int cur_c);
|
|
static int chg_c_a2m(int cur_c);
|
|
static int chg_c_a2f(int cur_c);
|
|
static int chg_c_i2m(int cur_c);
|
|
static int chg_c_f2m(int cur_c);
|
|
static int chg_c_laa2i(int hid_c);
|
|
static int chg_c_laa2f(int hid_c);
|
|
static int half_shape(int c);
|
|
static int A_firstc_laa(int c1, int c);
|
|
static int A_is_harakat(int c);
|
|
static int A_is_iso(int c);
|
|
static int A_is_formb(int c);
|
|
static int A_is_ok(int c);
|
|
static int A_is_valid(int c);
|
|
static int A_is_special(int c);
|
|
|
|
|
|
/*
|
|
* Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered)
|
|
*/
|
|
static int
|
|
A_is_a(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_HAMZA:
|
|
case a_ALEF_MADDA:
|
|
case a_ALEF_HAMZA_ABOVE:
|
|
case a_WAW_HAMZA:
|
|
case a_ALEF_HAMZA_BELOW:
|
|
case a_YEH_HAMZA:
|
|
case a_ALEF:
|
|
case a_BEH:
|
|
case a_TEH_MARBUTA:
|
|
case a_TEH:
|
|
case a_THEH:
|
|
case a_JEEM:
|
|
case a_HAH:
|
|
case a_KHAH:
|
|
case a_DAL:
|
|
case a_THAL:
|
|
case a_REH:
|
|
case a_ZAIN:
|
|
case a_SEEN:
|
|
case a_SHEEN:
|
|
case a_SAD:
|
|
case a_DAD:
|
|
case a_TAH:
|
|
case a_ZAH:
|
|
case a_AIN:
|
|
case a_GHAIN:
|
|
case a_TATWEEL:
|
|
case a_FEH:
|
|
case a_QAF:
|
|
case a_KAF:
|
|
case a_LAM:
|
|
case a_MEEM:
|
|
case a_NOON:
|
|
case a_HEH:
|
|
case a_WAW:
|
|
case a_ALEF_MAKSURA:
|
|
case a_YEH:
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/*
|
|
* Returns True if c is an Isolated Form-B ARABIC letter
|
|
*/
|
|
static int
|
|
A_is_s(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_s_HAMZA:
|
|
case a_s_ALEF_MADDA:
|
|
case a_s_ALEF_HAMZA_ABOVE:
|
|
case a_s_WAW_HAMZA:
|
|
case a_s_ALEF_HAMZA_BELOW:
|
|
case a_s_YEH_HAMZA:
|
|
case a_s_ALEF:
|
|
case a_s_BEH:
|
|
case a_s_TEH_MARBUTA:
|
|
case a_s_TEH:
|
|
case a_s_THEH:
|
|
case a_s_JEEM:
|
|
case a_s_HAH:
|
|
case a_s_KHAH:
|
|
case a_s_DAL:
|
|
case a_s_THAL:
|
|
case a_s_REH:
|
|
case a_s_ZAIN:
|
|
case a_s_SEEN:
|
|
case a_s_SHEEN:
|
|
case a_s_SAD:
|
|
case a_s_DAD:
|
|
case a_s_TAH:
|
|
case a_s_ZAH:
|
|
case a_s_AIN:
|
|
case a_s_GHAIN:
|
|
case a_s_FEH:
|
|
case a_s_QAF:
|
|
case a_s_KAF:
|
|
case a_s_LAM:
|
|
case a_s_MEEM:
|
|
case a_s_NOON:
|
|
case a_s_HEH:
|
|
case a_s_WAW:
|
|
case a_s_ALEF_MAKSURA:
|
|
case a_s_YEH:
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/*
|
|
* Returns True if c is a Final shape of an ARABIC letter
|
|
*/
|
|
static int
|
|
A_is_f(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_f_ALEF_MADDA:
|
|
case a_f_ALEF_HAMZA_ABOVE:
|
|
case a_f_WAW_HAMZA:
|
|
case a_f_ALEF_HAMZA_BELOW:
|
|
case a_f_YEH_HAMZA:
|
|
case a_f_ALEF:
|
|
case a_f_BEH:
|
|
case a_f_TEH_MARBUTA:
|
|
case a_f_TEH:
|
|
case a_f_THEH:
|
|
case a_f_JEEM:
|
|
case a_f_HAH:
|
|
case a_f_KHAH:
|
|
case a_f_DAL:
|
|
case a_f_THAL:
|
|
case a_f_REH:
|
|
case a_f_ZAIN:
|
|
case a_f_SEEN:
|
|
case a_f_SHEEN:
|
|
case a_f_SAD:
|
|
case a_f_DAD:
|
|
case a_f_TAH:
|
|
case a_f_ZAH:
|
|
case a_f_AIN:
|
|
case a_f_GHAIN:
|
|
case a_f_FEH:
|
|
case a_f_QAF:
|
|
case a_f_KAF:
|
|
case a_f_LAM:
|
|
case a_f_MEEM:
|
|
case a_f_NOON:
|
|
case a_f_HEH:
|
|
case a_f_WAW:
|
|
case a_f_ALEF_MAKSURA:
|
|
case a_f_YEH:
|
|
case a_f_LAM_ALEF_MADDA_ABOVE:
|
|
case a_f_LAM_ALEF_HAMZA_ABOVE:
|
|
case a_f_LAM_ALEF_HAMZA_BELOW:
|
|
case a_f_LAM_ALEF:
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from ISO-8859-6/Isolated to Form-B Isolated
|
|
*/
|
|
static int
|
|
chg_c_a2s(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_HAMZA: return a_s_HAMZA;
|
|
case a_ALEF_MADDA: return a_s_ALEF_MADDA;
|
|
case a_ALEF_HAMZA_ABOVE: return a_s_ALEF_HAMZA_ABOVE;
|
|
case a_WAW_HAMZA: return a_s_WAW_HAMZA;
|
|
case a_ALEF_HAMZA_BELOW: return a_s_ALEF_HAMZA_BELOW;
|
|
case a_YEH_HAMZA: return a_s_YEH_HAMZA;
|
|
case a_ALEF: return a_s_ALEF;
|
|
case a_TEH_MARBUTA: return a_s_TEH_MARBUTA;
|
|
case a_DAL: return a_s_DAL;
|
|
case a_THAL: return a_s_THAL;
|
|
case a_REH: return a_s_REH;
|
|
case a_ZAIN: return a_s_ZAIN;
|
|
case a_TATWEEL: return cur_c; /* exceptions */
|
|
case a_WAW: return a_s_WAW;
|
|
case a_ALEF_MAKSURA: return a_s_ALEF_MAKSURA;
|
|
case a_BEH: return a_s_BEH;
|
|
case a_TEH: return a_s_TEH;
|
|
case a_THEH: return a_s_THEH;
|
|
case a_JEEM: return a_s_JEEM;
|
|
case a_HAH: return a_s_HAH;
|
|
case a_KHAH: return a_s_KHAH;
|
|
case a_SEEN: return a_s_SEEN;
|
|
case a_SHEEN: return a_s_SHEEN;
|
|
case a_SAD: return a_s_SAD;
|
|
case a_DAD: return a_s_DAD;
|
|
case a_TAH: return a_s_TAH;
|
|
case a_ZAH: return a_s_ZAH;
|
|
case a_AIN: return a_s_AIN;
|
|
case a_GHAIN: return a_s_GHAIN;
|
|
case a_FEH: return a_s_FEH;
|
|
case a_QAF: return a_s_QAF;
|
|
case a_KAF: return a_s_KAF;
|
|
case a_LAM: return a_s_LAM;
|
|
case a_MEEM: return a_s_MEEM;
|
|
case a_NOON: return a_s_NOON;
|
|
case a_HEH: return a_s_HEH;
|
|
case a_YEH: return a_s_YEH;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from ISO-8859-6/Isolated to Initial
|
|
*/
|
|
static int
|
|
chg_c_a2i(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_YEH_HAMZA: return a_i_YEH_HAMZA;
|
|
case a_HAMZA: /* exceptions */
|
|
return a_s_HAMZA;
|
|
case a_ALEF_MADDA: /* exceptions */
|
|
return a_s_ALEF_MADDA;
|
|
case a_ALEF_HAMZA_ABOVE: /* exceptions */
|
|
return a_s_ALEF_HAMZA_ABOVE;
|
|
case a_WAW_HAMZA: /* exceptions */
|
|
return a_s_WAW_HAMZA;
|
|
case a_ALEF_HAMZA_BELOW: /* exceptions */
|
|
return a_s_ALEF_HAMZA_BELOW;
|
|
case a_ALEF: /* exceptions */
|
|
return a_s_ALEF;
|
|
case a_TEH_MARBUTA: /* exceptions */
|
|
return a_s_TEH_MARBUTA;
|
|
case a_DAL: /* exceptions */
|
|
return a_s_DAL;
|
|
case a_THAL: /* exceptions */
|
|
return a_s_THAL;
|
|
case a_REH: /* exceptions */
|
|
return a_s_REH;
|
|
case a_ZAIN: /* exceptions */
|
|
return a_s_ZAIN;
|
|
case a_TATWEEL: /* exceptions */
|
|
return cur_c;
|
|
case a_WAW: /* exceptions */
|
|
return a_s_WAW;
|
|
case a_ALEF_MAKSURA: /* exceptions */
|
|
return a_s_ALEF_MAKSURA;
|
|
case a_BEH: return a_i_BEH;
|
|
case a_TEH: return a_i_TEH;
|
|
case a_THEH: return a_i_THEH;
|
|
case a_JEEM: return a_i_JEEM;
|
|
case a_HAH: return a_i_HAH;
|
|
case a_KHAH: return a_i_KHAH;
|
|
case a_SEEN: return a_i_SEEN;
|
|
case a_SHEEN: return a_i_SHEEN;
|
|
case a_SAD: return a_i_SAD;
|
|
case a_DAD: return a_i_DAD;
|
|
case a_TAH: return a_i_TAH;
|
|
case a_ZAH: return a_i_ZAH;
|
|
case a_AIN: return a_i_AIN;
|
|
case a_GHAIN: return a_i_GHAIN;
|
|
case a_FEH: return a_i_FEH;
|
|
case a_QAF: return a_i_QAF;
|
|
case a_KAF: return a_i_KAF;
|
|
case a_LAM: return a_i_LAM;
|
|
case a_MEEM: return a_i_MEEM;
|
|
case a_NOON: return a_i_NOON;
|
|
case a_HEH: return a_i_HEH;
|
|
case a_YEH: return a_i_YEH;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from ISO-8859-6/Isolated to Medial
|
|
*/
|
|
static int
|
|
chg_c_a2m(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_HAMZA: return a_s_HAMZA; /* exception */
|
|
case a_ALEF_MADDA: return a_f_ALEF_MADDA; /* exception */
|
|
case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE; /* exception */
|
|
case a_WAW_HAMZA: return a_f_WAW_HAMZA; /* exception */
|
|
case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW; /* exception */
|
|
case a_YEH_HAMZA: return a_m_YEH_HAMZA;
|
|
case a_ALEF: return a_f_ALEF; /* exception */
|
|
case a_BEH: return a_m_BEH;
|
|
case a_TEH_MARBUTA: return a_f_TEH_MARBUTA; /* exception */
|
|
case a_TEH: return a_m_TEH;
|
|
case a_THEH: return a_m_THEH;
|
|
case a_JEEM: return a_m_JEEM;
|
|
case a_HAH: return a_m_HAH;
|
|
case a_KHAH: return a_m_KHAH;
|
|
case a_DAL: return a_f_DAL; /* exception */
|
|
case a_THAL: return a_f_THAL; /* exception */
|
|
case a_REH: return a_f_REH; /* exception */
|
|
case a_ZAIN: return a_f_ZAIN; /* exception */
|
|
case a_SEEN: return a_m_SEEN;
|
|
case a_SHEEN: return a_m_SHEEN;
|
|
case a_SAD: return a_m_SAD;
|
|
case a_DAD: return a_m_DAD;
|
|
case a_TAH: return a_m_TAH;
|
|
case a_ZAH: return a_m_ZAH;
|
|
case a_AIN: return a_m_AIN;
|
|
case a_GHAIN: return a_m_GHAIN;
|
|
case a_TATWEEL: return cur_c; /* exception */
|
|
case a_FEH: return a_m_FEH;
|
|
case a_QAF: return a_m_QAF;
|
|
case a_KAF: return a_m_KAF;
|
|
case a_LAM: return a_m_LAM;
|
|
case a_MEEM: return a_m_MEEM;
|
|
case a_NOON: return a_m_NOON;
|
|
case a_HEH: return a_m_HEH;
|
|
case a_WAW: return a_f_WAW; /* exception */
|
|
case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA; /* exception */
|
|
case a_YEH: return a_m_YEH;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from ISO-8859-6/Isolated to final
|
|
*/
|
|
static int
|
|
chg_c_a2f(int cur_c)
|
|
{
|
|
/* NOTE: these encodings need to be accounted for
|
|
* a_f_ALEF_MADDA;
|
|
* a_f_ALEF_HAMZA_ABOVE;
|
|
* a_f_ALEF_HAMZA_BELOW;
|
|
* a_f_LAM_ALEF_MADDA_ABOVE;
|
|
* a_f_LAM_ALEF_HAMZA_ABOVE;
|
|
* a_f_LAM_ALEF_HAMZA_BELOW;
|
|
*/
|
|
switch (cur_c)
|
|
{
|
|
case a_HAMZA: return a_s_HAMZA; /* exception */
|
|
case a_ALEF_MADDA: return a_f_ALEF_MADDA;
|
|
case a_ALEF_HAMZA_ABOVE: return a_f_ALEF_HAMZA_ABOVE;
|
|
case a_WAW_HAMZA: return a_f_WAW_HAMZA;
|
|
case a_ALEF_HAMZA_BELOW: return a_f_ALEF_HAMZA_BELOW;
|
|
case a_YEH_HAMZA: return a_f_YEH_HAMZA;
|
|
case a_ALEF: return a_f_ALEF;
|
|
case a_BEH: return a_f_BEH;
|
|
case a_TEH_MARBUTA: return a_f_TEH_MARBUTA;
|
|
case a_TEH: return a_f_TEH;
|
|
case a_THEH: return a_f_THEH;
|
|
case a_JEEM: return a_f_JEEM;
|
|
case a_HAH: return a_f_HAH;
|
|
case a_KHAH: return a_f_KHAH;
|
|
case a_DAL: return a_f_DAL;
|
|
case a_THAL: return a_f_THAL;
|
|
case a_REH: return a_f_REH;
|
|
case a_ZAIN: return a_f_ZAIN;
|
|
case a_SEEN: return a_f_SEEN;
|
|
case a_SHEEN: return a_f_SHEEN;
|
|
case a_SAD: return a_f_SAD;
|
|
case a_DAD: return a_f_DAD;
|
|
case a_TAH: return a_f_TAH;
|
|
case a_ZAH: return a_f_ZAH;
|
|
case a_AIN: return a_f_AIN;
|
|
case a_GHAIN: return a_f_GHAIN;
|
|
case a_TATWEEL: return cur_c; /* exception */
|
|
case a_FEH: return a_f_FEH;
|
|
case a_QAF: return a_f_QAF;
|
|
case a_KAF: return a_f_KAF;
|
|
case a_LAM: return a_f_LAM;
|
|
case a_MEEM: return a_f_MEEM;
|
|
case a_NOON: return a_f_NOON;
|
|
case a_HEH: return a_f_HEH;
|
|
case a_WAW: return a_f_WAW;
|
|
case a_ALEF_MAKSURA: return a_f_ALEF_MAKSURA;
|
|
case a_YEH: return a_f_YEH;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from Initial to Medial
|
|
*/
|
|
static int
|
|
chg_c_i2m(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
case a_i_YEH_HAMZA: return a_m_YEH_HAMZA;
|
|
case a_i_BEH: return a_m_BEH;
|
|
case a_i_TEH: return a_m_TEH;
|
|
case a_i_THEH: return a_m_THEH;
|
|
case a_i_JEEM: return a_m_JEEM;
|
|
case a_i_HAH: return a_m_HAH;
|
|
case a_i_KHAH: return a_m_KHAH;
|
|
case a_i_SEEN: return a_m_SEEN;
|
|
case a_i_SHEEN: return a_m_SHEEN;
|
|
case a_i_SAD: return a_m_SAD;
|
|
case a_i_DAD: return a_m_DAD;
|
|
case a_i_TAH: return a_m_TAH;
|
|
case a_i_ZAH: return a_m_ZAH;
|
|
case a_i_AIN: return a_m_AIN;
|
|
case a_i_GHAIN: return a_m_GHAIN;
|
|
case a_i_FEH: return a_m_FEH;
|
|
case a_i_QAF: return a_m_QAF;
|
|
case a_i_KAF: return a_m_KAF;
|
|
case a_i_LAM: return a_m_LAM;
|
|
case a_i_MEEM: return a_m_MEEM;
|
|
case a_i_NOON: return a_m_NOON;
|
|
case a_i_HEH: return a_m_HEH;
|
|
case a_i_YEH: return a_m_YEH;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from Final to Medial
|
|
*/
|
|
static int
|
|
chg_c_f2m(int cur_c)
|
|
{
|
|
switch (cur_c)
|
|
{
|
|
/* NOTE: these encodings are multi-positional, no ?
|
|
* case a_f_ALEF_MADDA:
|
|
* case a_f_ALEF_HAMZA_ABOVE:
|
|
* case a_f_ALEF_HAMZA_BELOW:
|
|
*/
|
|
case a_f_YEH_HAMZA: return a_m_YEH_HAMZA;
|
|
case a_f_WAW_HAMZA: /* exceptions */
|
|
case a_f_ALEF:
|
|
case a_f_TEH_MARBUTA:
|
|
case a_f_DAL:
|
|
case a_f_THAL:
|
|
case a_f_REH:
|
|
case a_f_ZAIN:
|
|
case a_f_WAW:
|
|
case a_f_ALEF_MAKSURA:
|
|
return cur_c;
|
|
case a_f_BEH: return a_m_BEH;
|
|
case a_f_TEH: return a_m_TEH;
|
|
case a_f_THEH: return a_m_THEH;
|
|
case a_f_JEEM: return a_m_JEEM;
|
|
case a_f_HAH: return a_m_HAH;
|
|
case a_f_KHAH: return a_m_KHAH;
|
|
case a_f_SEEN: return a_m_SEEN;
|
|
case a_f_SHEEN: return a_m_SHEEN;
|
|
case a_f_SAD: return a_m_SAD;
|
|
case a_f_DAD: return a_m_DAD;
|
|
case a_f_TAH: return a_m_TAH;
|
|
case a_f_ZAH: return a_m_ZAH;
|
|
case a_f_AIN: return a_m_AIN;
|
|
case a_f_GHAIN: return a_m_GHAIN;
|
|
case a_f_FEH: return a_m_FEH;
|
|
case a_f_QAF: return a_m_QAF;
|
|
case a_f_KAF: return a_m_KAF;
|
|
case a_f_LAM: return a_m_LAM;
|
|
case a_f_MEEM: return a_m_MEEM;
|
|
case a_f_NOON: return a_m_NOON;
|
|
case a_f_HEH: return a_m_HEH;
|
|
case a_f_YEH: return a_m_YEH;
|
|
|
|
/* NOTE: these encodings are multi-positional, no ?
|
|
* case a_f_LAM_ALEF_MADDA_ABOVE:
|
|
* case a_f_LAM_ALEF_HAMZA_ABOVE:
|
|
* case a_f_LAM_ALEF_HAMZA_BELOW:
|
|
* case a_f_LAM_ALEF:
|
|
*/
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from Combination (2 char) to an Isolated
|
|
*/
|
|
static int
|
|
chg_c_laa2i(int hid_c)
|
|
{
|
|
switch (hid_c)
|
|
{
|
|
case a_ALEF_MADDA: return a_s_LAM_ALEF_MADDA_ABOVE;
|
|
case a_ALEF_HAMZA_ABOVE: return a_s_LAM_ALEF_HAMZA_ABOVE;
|
|
case a_ALEF_HAMZA_BELOW: return a_s_LAM_ALEF_HAMZA_BELOW;
|
|
case a_ALEF: return a_s_LAM_ALEF;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Change shape - from Combination-Isolated to Final
|
|
*/
|
|
static int
|
|
chg_c_laa2f(int hid_c)
|
|
{
|
|
switch (hid_c)
|
|
{
|
|
case a_ALEF_MADDA: return a_f_LAM_ALEF_MADDA_ABOVE;
|
|
case a_ALEF_HAMZA_ABOVE: return a_f_LAM_ALEF_HAMZA_ABOVE;
|
|
case a_ALEF_HAMZA_BELOW: return a_f_LAM_ALEF_HAMZA_BELOW;
|
|
case a_ALEF: return a_f_LAM_ALEF;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Do "half-shaping" on character "c". Return zero if no shaping.
|
|
*/
|
|
static int
|
|
half_shape(int c)
|
|
{
|
|
if (A_is_a(c))
|
|
return chg_c_a2i(c);
|
|
if (A_is_valid(c) && A_is_f(c))
|
|
return chg_c_f2m(c);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Do Arabic shaping on character "c". Returns the shaped character.
|
|
* out: "ccp" points to the first byte of the character to be shaped.
|
|
* in/out: "c1p" points to the first composing char for "c".
|
|
* in: "prev_c" is the previous character (not shaped)
|
|
* in: "prev_c1" is the first composing char for the previous char
|
|
* (not shaped)
|
|
* in: "next_c" is the next character (not shaped).
|
|
*/
|
|
int
|
|
arabic_shape(
|
|
int c,
|
|
int *ccp,
|
|
int *c1p,
|
|
int prev_c,
|
|
int prev_c1,
|
|
int next_c)
|
|
{
|
|
int curr_c;
|
|
int shape_c;
|
|
int curr_laa;
|
|
int prev_laa;
|
|
|
|
/* Deal only with Arabic character, pass back all others */
|
|
if (!A_is_ok(c))
|
|
return c;
|
|
|
|
/* half-shape current and previous character */
|
|
shape_c = half_shape(prev_c);
|
|
|
|
/* Save away current character */
|
|
curr_c = c;
|
|
|
|
curr_laa = A_firstc_laa(c, *c1p);
|
|
prev_laa = A_firstc_laa(prev_c, prev_c1);
|
|
|
|
if (curr_laa)
|
|
{
|
|
if (A_is_valid(prev_c) && !A_is_f(shape_c)
|
|
&& !A_is_s(shape_c) && !prev_laa)
|
|
curr_c = chg_c_laa2f(curr_laa);
|
|
else
|
|
curr_c = chg_c_laa2i(curr_laa);
|
|
|
|
/* Remove the composing character */
|
|
*c1p = 0;
|
|
}
|
|
else if (!A_is_valid(prev_c) && A_is_valid(next_c))
|
|
curr_c = chg_c_a2i(c);
|
|
else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa)
|
|
curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c);
|
|
else if (A_is_valid(next_c))
|
|
curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c);
|
|
else if (A_is_valid(prev_c))
|
|
curr_c = chg_c_a2f(c);
|
|
else
|
|
curr_c = chg_c_a2s(c);
|
|
|
|
/* Sanity check -- curr_c should, in the future, never be 0.
|
|
* We should, in the future, insert a fatal error here. */
|
|
if (curr_c == NUL)
|
|
curr_c = c;
|
|
|
|
if (curr_c != c && ccp != NULL)
|
|
{
|
|
char_u buf[MB_MAXBYTES + 1];
|
|
|
|
/* Update the first byte of the character. */
|
|
(*mb_char2bytes)(curr_c, buf);
|
|
*ccp = buf[0];
|
|
}
|
|
|
|
/* Return the shaped character */
|
|
return curr_c;
|
|
}
|
|
|
|
|
|
/*
|
|
* A_firstc_laa returns first character of LAA combination if it exists
|
|
*/
|
|
static int
|
|
A_firstc_laa(
|
|
int c, /* base character */
|
|
int c1) /* first composing character */
|
|
{
|
|
if (c1 != NUL && c == a_LAM && !A_is_harakat(c1))
|
|
return c1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* A_is_harakat returns TRUE if 'c' is an Arabic Harakat character
|
|
* (harakat/tanween)
|
|
*/
|
|
static int
|
|
A_is_harakat(int c)
|
|
{
|
|
return (c >= a_FATHATAN && c <= a_SUKUN);
|
|
}
|
|
|
|
|
|
/*
|
|
* A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character
|
|
* (alphabet/number/punctuation)
|
|
*/
|
|
static int
|
|
A_is_iso(int c)
|
|
{
|
|
return ((c >= a_HAMZA && c <= a_GHAIN)
|
|
|| (c >= a_TATWEEL && c <= a_HAMZA_BELOW)
|
|
|| c == a_MINI_ALEF);
|
|
}
|
|
|
|
|
|
/*
|
|
* A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character
|
|
* (alphabet/number/punctuation)
|
|
*/
|
|
static int
|
|
A_is_formb(int c)
|
|
{
|
|
return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN)
|
|
|| c == a_s_KASRATAN
|
|
|| (c >= a_s_FATHA && c <= a_f_LAM_ALEF)
|
|
|| c == a_BYTE_ORDER_MARK);
|
|
}
|
|
|
|
|
|
/*
|
|
* A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
|
|
*/
|
|
static int
|
|
A_is_ok(int c)
|
|
{
|
|
return (A_is_iso(c) || A_is_formb(c));
|
|
}
|
|
|
|
|
|
/*
|
|
* A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B)
|
|
* with some exceptions/exclusions
|
|
*/
|
|
static int
|
|
A_is_valid(int c)
|
|
{
|
|
return (A_is_ok(c) && !A_is_special(c));
|
|
}
|
|
|
|
|
|
/*
|
|
* A_is_special returns TRUE if 'c' is not a special Arabic character.
|
|
* Specials don't adhere to most of the rules.
|
|
*/
|
|
static int
|
|
A_is_special(int c)
|
|
{
|
|
return (c == a_HAMZA || c == a_s_HAMZA);
|
|
}
|
|
|
|
#endif /* FEAT_ARABIC */
|