diff options
Diffstat (limited to 'third_party/harfbuzz/src/harfbuzz-arabic.c')
-rw-r--r-- | third_party/harfbuzz/src/harfbuzz-arabic.c | 1090 |
1 files changed, 1090 insertions, 0 deletions
diff --git a/third_party/harfbuzz/src/harfbuzz-arabic.c b/third_party/harfbuzz/src/harfbuzz-arabic.c new file mode 100644 index 0000000..0609232 --- /dev/null +++ b/third_party/harfbuzz/src/harfbuzz-arabic.c @@ -0,0 +1,1090 @@ +/* + * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) + * + * This is part of HarfBuzz, an OpenType Layout engine library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "harfbuzz-shaper.h" +#include "harfbuzz-shaper-private.h" + +#include <assert.h> + +static const HB_UChar16 ReplacementCharacter = 0xfffd; + +typedef struct { + unsigned char shape; + unsigned char justification; +} HB_ArabicProperties; + +typedef enum { + XIsolated, + XFinal, + XInitial, + XMedial, + /* intermediate state */ + XCausing +} ArabicShape; + +/* +// these groups correspond to the groups defined in the Unicode standard. +// Some of these groups are equal with regards to both joining and line breaking behaviour, +// and thus have the same enum value +// +// I'm not sure the mapping of syriac to arabic enums is correct with regards to justification, but as +// I couldn't find any better document I'll hope for the best. +*/ +typedef enum { + /* NonJoining */ + ArabicNone, + ArabicSpace, + /* Transparent */ + Transparent, + /* Causing */ + Center, + Kashida, + + /* Arabic */ + /* Dual */ + Beh, + Noon, + Meem = Noon, + Heh = Noon, + KnottedHeh = Noon, + HehGoal = Noon, + SwashKaf = Noon, + Yeh, + Hah, + Seen, + Sad = Seen, + Tah, + Kaf = Tah, + Gaf = Tah, + Lam = Tah, + Ain, + Feh = Ain, + Qaf = Ain, + /* Right */ + Alef, + Waw, + Dal, + TehMarbuta = Dal, + Reh, + HamzaOnHehGoal, + YehWithTail = HamzaOnHehGoal, + YehBarre = HamzaOnHehGoal, + + /* Syriac */ + /* Dual */ + Beth = Beh, + Gamal = Ain, + Heth = Noon, + Teth = Hah, + Yudh = Noon, + Kaph = Noon, + Lamadh = Lam, + Mim = Noon, + Nun = Noon, + Semakh = Noon, + FinalSemakh = Noon, + SyriacE = Ain, + Pe = Ain, + ReversedPe = Hah, + Qaph = Noon, + Shin = Noon, + Fe = Ain, + + /* Right */ + Alaph = Alef, + Dalath = Dal, + He = Dal, + SyriacWaw = Waw, + Zain = Alef, + YudhHe = Waw, + Sadhe = HamzaOnHehGoal, + Taw = Dal, + + /* Compiler bug? Otherwise ArabicGroupsEnd would be equal to Dal + 1. */ + Dummy = HamzaOnHehGoal, + ArabicGroupsEnd +} ArabicGroup; + +static const unsigned char arabic_group[0x150] = { + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + ArabicNone, ArabicNone, Alef, Alef, + Waw, Alef, Yeh, Alef, + Beh, TehMarbuta, Beh, Beh, + Hah, Hah, Hah, Dal, + + Dal, Reh, Reh, Seen, + Seen, Sad, Sad, Tah, + Tah, Ain, Ain, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + /* 0x640 */ + Kashida, Feh, Qaf, Kaf, + Lam, Meem, Noon, Heh, + Waw, Yeh, Yeh, Transparent, + Transparent, Transparent, Transparent, Transparent, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, Beh, Qaf, + + Transparent, Alef, Alef, Alef, + ArabicNone, Alef, Waw, Waw, + Yeh, Beh, Beh, Beh, + Beh, Beh, Beh, Beh, + + /* 0x680 */ + Beh, Hah, Hah, Hah, + Hah, Hah, Hah, Hah, + Dal, Dal, Dal, Dal, + Dal, Dal, Dal, Dal, + + Dal, Reh, Reh, Reh, + Reh, Reh, Reh, Reh, + Reh, Reh, Seen, Seen, + Seen, Sad, Sad, Tah, + + Ain, Feh, Feh, Feh, + Feh, Feh, Feh, Qaf, + Qaf, Gaf, SwashKaf, Gaf, + Kaf, Kaf, Kaf, Gaf, + + Gaf, Gaf, Gaf, Gaf, + Gaf, Lam, Lam, Lam, + Lam, Noon, Noon, Noon, + Noon, Noon, KnottedHeh, Hah, + + /* 0x6c0 */ + TehMarbuta, HehGoal, HamzaOnHehGoal, HamzaOnHehGoal, + Waw, Waw, Waw, Waw, + Waw, Waw, Waw, Waw, + Yeh, YehWithTail, Yeh, Waw, + + Yeh, Yeh, YehBarre, YehBarre, + ArabicNone, TehMarbuta, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, ArabicNone, ArabicNone, Transparent, + + Transparent, Transparent, Transparent, Transparent, + Transparent, ArabicNone, ArabicNone, Transparent, + Transparent, ArabicNone, Transparent, Transparent, + Transparent, Transparent, Dal, Reh, + + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, Seen, Sad, + Ain, ArabicNone, ArabicNone, KnottedHeh, + + /* 0x700 */ + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + ArabicNone, ArabicNone, ArabicNone, ArabicNone, + + Alaph, Transparent, Beth, Gamal, + Gamal, Dalath, Dalath, He, + SyriacWaw, Zain, Heth, Teth, + Teth, Yudh, YudhHe, Kaph, + + Lamadh, Mim, Nun, Semakh, + FinalSemakh, SyriacE, Pe, ReversedPe, + Sadhe, Qaph, Dalath, Shin, + Taw, Beth, Gamal, Dalath, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, Transparent, + Transparent, Transparent, Transparent, ArabicNone, + ArabicNone, Zain, Kaph, Fe, +}; + +static ArabicGroup arabicGroup(unsigned short uc) +{ + if (uc >= 0x0600 && uc < 0x750) + return (ArabicGroup) arabic_group[uc-0x600]; + else if (uc == 0x200d) + return Center; + else if (HB_GetUnicodeCharCategory(uc) == HB_Separator_Space) + return ArabicSpace; + else + return ArabicNone; +} + + +/* + Arabic shaping obeys a number of rules according to the joining classes (see Unicode book, section on + arabic). + + Each unicode char has a joining class (right, dual (left&right), center (joincausing) or transparent). + transparent joining is not encoded in HB_UChar16::joining(), but applies to all combining marks and format marks. + + Right join-causing: dual + center + Left join-causing: dual + right + center + + Rules are as follows (for a string already in visual order, as we have it here): + + R1 Transparent characters do not affect joining behaviour. + R2 A right joining character, that has a right join-causing char on the right will get form XRight + (R3 A left joining character, that has a left join-causing char on the left will get form XLeft) + Note: the above rule is meaningless, as there are no pure left joining characters defined in Unicode + R4 A dual joining character, that has a left join-causing char on the left and a right join-causing char on + the right will get form XMedial + R5 A dual joining character, that has a right join causing char on the right, and no left join causing char on the left + will get form XRight + R6 A dual joining character, that has a left join causing char on the left, and no right join causing char on the right + will get form XLeft + R7 Otherwise the character will get form XIsolated + + Additionally we have to do the minimal ligature support for lam-alef ligatures: + + L1 Transparent characters do not affect ligature behaviour. + L2 Any sequence of Alef(XRight) + Lam(XMedial) will form the ligature Alef.Lam(XLeft) + L3 Any sequence of Alef(XRight) + Lam(XLeft) will form the ligature Alef.Lam(XIsolated) + + The state table below handles rules R1-R7. +*/ + +typedef enum { + JNone, + JCausing, + JDual, + JRight, + JTransparent +} Joining; + +static const Joining joining_for_group[ArabicGroupsEnd] = { + /* NonJoining */ + JNone, /* ArabicNone */ + JNone, /* ArabicSpace */ + /* Transparent */ + JTransparent, /* Transparent */ + /* Causing */ + JCausing, /* Center */ + JCausing, /* Kashida */ + /* Dual */ + JDual, /* Beh */ + JDual, /* Noon */ + JDual, /* Yeh */ + JDual, /* Hah */ + JDual, /* Seen */ + JDual, /* Tah */ + JDual, /* Ain */ + /* Right */ + JRight, /* Alef */ + JRight, /* Waw */ + JRight, /* Dal */ + JRight, /* Reh */ + JRight /* HamzaOnHehGoal */ +}; + + +typedef struct { + ArabicShape form1; + ArabicShape form2; +} JoiningPair; + +static const JoiningPair joining_table[5][4] = +/* None, Causing, Dual, Right */ +{ + { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XInitial }, { XIsolated, XIsolated } }, /* XIsolated */ + { { XFinal, XIsolated }, { XFinal, XCausing }, { XFinal, XInitial }, { XFinal, XIsolated } }, /* XFinal */ + { { XIsolated, XIsolated }, { XInitial, XCausing }, { XInitial, XMedial }, { XInitial, XFinal } }, /* XInitial */ + { { XFinal, XIsolated }, { XMedial, XCausing }, { XMedial, XMedial }, { XMedial, XFinal } }, /* XMedial */ + { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XMedial }, { XIsolated, XFinal } }, /* XCausing */ +}; + + +/* +According to http://www.microsoft.com/middleeast/Arabicdev/IE6/KBase.asp + +1. Find the priority of the connecting opportunities in each word +2. Add expansion at the highest priority connection opportunity +3. If more than one connection opportunity have the same highest value, + use the opportunity closest to the end of the word. + +Following is a chart that provides the priority for connection +opportunities and where expansion occurs. The character group names +are those in table 6.6 of the UNICODE 2.0 book. + + +PrioritY Glyph Condition Kashida Location + +Arabic_Kashida User inserted Kashida The user entered a Kashida in a position. After the user + (Shift+j or Shift+[E with hat]) Thus, it is the highest priority to insert an inserted kashida + automatic kashida. + +Arabic_Seen Seen, Sad Connecting to the next character. After the character. + (Initial or medial form). + +Arabic_HaaDal Teh Marbutah, Haa, Dal Connecting to previous character. Before the final form + of these characters. + +Arabic_Alef Alef, Tah, Lam, Connecting to previous character. Before the final form + Kaf and Gaf of these characters. + +Arabic_BaRa Reh, Yeh Connected to medial Beh Before preceding medial Baa + +Arabic_Waw Waw, Ain, Qaf, Feh Connecting to previous character. Before the final form of + these characters. + +Arabic_Normal Other connecting Connecting to previous character. Before the final form + characters of these characters. + + + +This seems to imply that we have at most one kashida point per arabic word. + +*/ + +static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties) +{ +/* qDebug("arabicSyriacOpenTypeShape: properties:"); */ + int lastPos = 0; + int lastGroup = ArabicNone; + int i = 0; + + ArabicGroup group = arabicGroup(chars[0]); + Joining j = joining_for_group[group]; + ArabicShape shape = joining_table[XIsolated][j].form2; + properties[0].justification = HB_NoJustification; + + for (i = 1; i < len; ++i) { + /* #### fix handling for spaces and punktuation */ + properties[i].justification = HB_NoJustification; + + group = arabicGroup(chars[i]); + j = joining_for_group[group]; + + if (j == JTransparent) { + properties[i].shape = XIsolated; + continue; + } + + properties[lastPos].shape = joining_table[shape][j].form1; + shape = joining_table[shape][j].form2; + + switch(lastGroup) { + case Seen: + if (properties[lastPos].shape == XInitial || properties[lastPos].shape == XMedial) + properties[i-1].justification = HB_Arabic_Seen; + break; + case Hah: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = HB_Arabic_HaaDal; + break; + case Alef: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = HB_Arabic_Alef; + break; + case Ain: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = HB_Arabic_Waw; + break; + case Noon: + if (properties[lastPos].shape == XFinal) + properties[lastPos-1].justification = HB_Arabic_Normal; + break; + case ArabicNone: + break; + + default: + assert(FALSE); + } + + lastGroup = ArabicNone; + + switch(group) { + case ArabicNone: + case Transparent: + /* ### Center should probably be treated as transparent when it comes to justification. */ + case Center: + break; + case ArabicSpace: + properties[i].justification = HB_Arabic_Space; + break; + case Kashida: + properties[i].justification = HB_Arabic_Kashida; + break; + case Seen: + lastGroup = Seen; + break; + + case Hah: + case Dal: + lastGroup = Hah; + break; + + case Alef: + case Tah: + lastGroup = Alef; + break; + + case Yeh: + case Reh: + if (properties[lastPos].shape == XMedial && arabicGroup(chars[lastPos]) == Beh) + properties[lastPos-1].justification = HB_Arabic_BaRa; + break; + + case Ain: + case Waw: + lastGroup = Ain; + break; + + case Noon: + case Beh: + case HamzaOnHehGoal: + lastGroup = Noon; + break; + case ArabicGroupsEnd: + assert(FALSE); + } + + lastPos = i; + } + properties[lastPos].shape = joining_table[shape][JNone].form1; + + + /* + for (int i = 0; i < len; ++i) + qDebug("arabic properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification); + */ +} + +/* +// The unicode to unicode shaping codec. +// does only presentation forms B at the moment, but that should be enough for +// simple display +*/ +static const hb_uint16 arabicUnicodeMapping[256][2] = { + /* base of shaped forms, and number-1 of them (0 for non shaping, + 1 for right binding and 3 for dual binding */ + + /* These are just the glyphs available in Unicode, + some characters are in R class, but have no glyphs in Unicode. */ + + { 0x0600, 0 }, /* 0x0600 */ + { 0x0601, 0 }, /* 0x0601 */ + { 0x0602, 0 }, /* 0x0602 */ + { 0x0603, 0 }, /* 0x0603 */ + { 0x0604, 0 }, /* 0x0604 */ + { 0x0605, 0 }, /* 0x0605 */ + { 0x0606, 0 }, /* 0x0606 */ + { 0x0607, 0 }, /* 0x0607 */ + { 0x0608, 0 }, /* 0x0608 */ + { 0x0609, 0 }, /* 0x0609 */ + { 0x060A, 0 }, /* 0x060A */ + { 0x060B, 0 }, /* 0x060B */ + { 0x060C, 0 }, /* 0x060C */ + { 0x060D, 0 }, /* 0x060D */ + { 0x060E, 0 }, /* 0x060E */ + { 0x060F, 0 }, /* 0x060F */ + + { 0x0610, 0 }, /* 0x0610 */ + { 0x0611, 0 }, /* 0x0611 */ + { 0x0612, 0 }, /* 0x0612 */ + { 0x0613, 0 }, /* 0x0613 */ + { 0x0614, 0 }, /* 0x0614 */ + { 0x0615, 0 }, /* 0x0615 */ + { 0x0616, 0 }, /* 0x0616 */ + { 0x0617, 0 }, /* 0x0617 */ + { 0x0618, 0 }, /* 0x0618 */ + { 0x0619, 0 }, /* 0x0619 */ + { 0x061A, 0 }, /* 0x061A */ + { 0x061B, 0 }, /* 0x061B */ + { 0x061C, 0 }, /* 0x061C */ + { 0x061D, 0 }, /* 0x061D */ + { 0x061E, 0 }, /* 0x061E */ + { 0x061F, 0 }, /* 0x061F */ + + { 0x0620, 0 }, /* 0x0620 */ + { 0xFE80, 0 }, /* 0x0621 HAMZA */ + { 0xFE81, 1 }, /* 0x0622 R ALEF WITH MADDA ABOVE */ + { 0xFE83, 1 }, /* 0x0623 R ALEF WITH HAMZA ABOVE */ + { 0xFE85, 1 }, /* 0x0624 R WAW WITH HAMZA ABOVE */ + { 0xFE87, 1 }, /* 0x0625 R ALEF WITH HAMZA BELOW */ + { 0xFE89, 3 }, /* 0x0626 D YEH WITH HAMZA ABOVE */ + { 0xFE8D, 1 }, /* 0x0627 R ALEF */ + { 0xFE8F, 3 }, /* 0x0628 D BEH */ + { 0xFE93, 1 }, /* 0x0629 R TEH MARBUTA */ + { 0xFE95, 3 }, /* 0x062A D TEH */ + { 0xFE99, 3 }, /* 0x062B D THEH */ + { 0xFE9D, 3 }, /* 0x062C D JEEM */ + { 0xFEA1, 3 }, /* 0x062D D HAH */ + { 0xFEA5, 3 }, /* 0x062E D KHAH */ + { 0xFEA9, 1 }, /* 0x062F R DAL */ + + { 0xFEAB, 1 }, /* 0x0630 R THAL */ + { 0xFEAD, 1 }, /* 0x0631 R REH */ + { 0xFEAF, 1 }, /* 0x0632 R ZAIN */ + { 0xFEB1, 3 }, /* 0x0633 D SEEN */ + { 0xFEB5, 3 }, /* 0x0634 D SHEEN */ + { 0xFEB9, 3 }, /* 0x0635 D SAD */ + { 0xFEBD, 3 }, /* 0x0636 D DAD */ + { 0xFEC1, 3 }, /* 0x0637 D TAH */ + { 0xFEC5, 3 }, /* 0x0638 D ZAH */ + { 0xFEC9, 3 }, /* 0x0639 D AIN */ + { 0xFECD, 3 }, /* 0x063A D GHAIN */ + { 0x063B, 0 }, /* 0x063B */ + { 0x063C, 0 }, /* 0x063C */ + { 0x063D, 0 }, /* 0x063D */ + { 0x063E, 0 }, /* 0x063E */ + { 0x063F, 0 }, /* 0x063F */ + + { 0x0640, 0 }, /* 0x0640 C TATWEEL // ### Join Causing, only one glyph */ + { 0xFED1, 3 }, /* 0x0641 D FEH */ + { 0xFED5, 3 }, /* 0x0642 D QAF */ + { 0xFED9, 3 }, /* 0x0643 D KAF */ + { 0xFEDD, 3 }, /* 0x0644 D LAM */ + { 0xFEE1, 3 }, /* 0x0645 D MEEM */ + { 0xFEE5, 3 }, /* 0x0646 D NOON */ + { 0xFEE9, 3 }, /* 0x0647 D HEH */ + { 0xFEED, 1 }, /* 0x0648 R WAW */ + { 0x0649, 3 }, /* 0x0649 ALEF MAKSURA // ### Dual, glyphs not consecutive, handle in code. */ + { 0xFEF1, 3 }, /* 0x064A D YEH */ + { 0x064B, 0 }, /* 0x064B */ + { 0x064C, 0 }, /* 0x064C */ + { 0x064D, 0 }, /* 0x064D */ + { 0x064E, 0 }, /* 0x064E */ + { 0x064F, 0 }, /* 0x064F */ + + { 0x0650, 0 }, /* 0x0650 */ + { 0x0651, 0 }, /* 0x0651 */ + { 0x0652, 0 }, /* 0x0652 */ + { 0x0653, 0 }, /* 0x0653 */ + { 0x0654, 0 }, /* 0x0654 */ + { 0x0655, 0 }, /* 0x0655 */ + { 0x0656, 0 }, /* 0x0656 */ + { 0x0657, 0 }, /* 0x0657 */ + { 0x0658, 0 }, /* 0x0658 */ + { 0x0659, 0 }, /* 0x0659 */ + { 0x065A, 0 }, /* 0x065A */ + { 0x065B, 0 }, /* 0x065B */ + { 0x065C, 0 }, /* 0x065C */ + { 0x065D, 0 }, /* 0x065D */ + { 0x065E, 0 }, /* 0x065E */ + { 0x065F, 0 }, /* 0x065F */ + + { 0x0660, 0 }, /* 0x0660 */ + { 0x0661, 0 }, /* 0x0661 */ + { 0x0662, 0 }, /* 0x0662 */ + { 0x0663, 0 }, /* 0x0663 */ + { 0x0664, 0 }, /* 0x0664 */ + { 0x0665, 0 }, /* 0x0665 */ + { 0x0666, 0 }, /* 0x0666 */ + { 0x0667, 0 }, /* 0x0667 */ + { 0x0668, 0 }, /* 0x0668 */ + { 0x0669, 0 }, /* 0x0669 */ + { 0x066A, 0 }, /* 0x066A */ + { 0x066B, 0 }, /* 0x066B */ + { 0x066C, 0 }, /* 0x066C */ + { 0x066D, 0 }, /* 0x066D */ + { 0x066E, 0 }, /* 0x066E */ + { 0x066F, 0 }, /* 0x066F */ + + { 0x0670, 0 }, /* 0x0670 */ + { 0xFB50, 1 }, /* 0x0671 R ALEF WASLA */ + { 0x0672, 0 }, /* 0x0672 */ + { 0x0673, 0 }, /* 0x0673 */ + { 0x0674, 0 }, /* 0x0674 */ + { 0x0675, 0 }, /* 0x0675 */ + { 0x0676, 0 }, /* 0x0676 */ + { 0x0677, 0 }, /* 0x0677 */ + { 0x0678, 0 }, /* 0x0678 */ + { 0xFB66, 3 }, /* 0x0679 D TTEH */ + { 0xFB5E, 3 }, /* 0x067A D TTEHEH */ + { 0xFB52, 3 }, /* 0x067B D BEEH */ + { 0x067C, 0 }, /* 0x067C */ + { 0x067D, 0 }, /* 0x067D */ + { 0xFB56, 3 }, /* 0x067E D PEH */ + { 0xFB62, 3 }, /* 0x067F D TEHEH */ + + { 0xFB5A, 3 }, /* 0x0680 D BEHEH */ + { 0x0681, 0 }, /* 0x0681 */ + { 0x0682, 0 }, /* 0x0682 */ + { 0xFB76, 3 }, /* 0x0683 D NYEH */ + { 0xFB72, 3 }, /* 0x0684 D DYEH */ + { 0x0685, 0 }, /* 0x0685 */ + { 0xFB7A, 3 }, /* 0x0686 D TCHEH */ + { 0xFB7E, 3 }, /* 0x0687 D TCHEHEH */ + { 0xFB88, 1 }, /* 0x0688 R DDAL */ + { 0x0689, 0 }, /* 0x0689 */ + { 0x068A, 0 }, /* 0x068A */ + { 0x068B, 0 }, /* 0x068B */ + { 0xFB84, 1 }, /* 0x068C R DAHAL */ + { 0xFB82, 1 }, /* 0x068D R DDAHAL */ + { 0xFB86, 1 }, /* 0x068E R DUL */ + { 0x068F, 0 }, /* 0x068F */ + + { 0x0690, 0 }, /* 0x0690 */ + { 0xFB8C, 1 }, /* 0x0691 R RREH */ + { 0x0692, 0 }, /* 0x0692 */ + { 0x0693, 0 }, /* 0x0693 */ + { 0x0694, 0 }, /* 0x0694 */ + { 0x0695, 0 }, /* 0x0695 */ + { 0x0696, 0 }, /* 0x0696 */ + { 0x0697, 0 }, /* 0x0697 */ + { 0xFB8A, 1 }, /* 0x0698 R JEH */ + { 0x0699, 0 }, /* 0x0699 */ + { 0x069A, 0 }, /* 0x069A */ + { 0x069B, 0 }, /* 0x069B */ + { 0x069C, 0 }, /* 0x069C */ + { 0x069D, 0 }, /* 0x069D */ + { 0x069E, 0 }, /* 0x069E */ + { 0x069F, 0 }, /* 0x069F */ + + { 0x06A0, 0 }, /* 0x06A0 */ + { 0x06A1, 0 }, /* 0x06A1 */ + { 0x06A2, 0 }, /* 0x06A2 */ + { 0x06A3, 0 }, /* 0x06A3 */ + { 0xFB6A, 3 }, /* 0x06A4 D VEH */ + { 0x06A5, 0 }, /* 0x06A5 */ + { 0xFB6E, 3 }, /* 0x06A6 D PEHEH */ + { 0x06A7, 0 }, /* 0x06A7 */ + { 0x06A8, 0 }, /* 0x06A8 */ + { 0xFB8E, 3 }, /* 0x06A9 D KEHEH */ + { 0x06AA, 0 }, /* 0x06AA */ + { 0x06AB, 0 }, /* 0x06AB */ + { 0x06AC, 0 }, /* 0x06AC */ + { 0xFBD3, 3 }, /* 0x06AD D NG */ + { 0x06AE, 0 }, /* 0x06AE */ + { 0xFB92, 3 }, /* 0x06AF D GAF */ + + { 0x06B0, 0 }, /* 0x06B0 */ + { 0xFB9A, 3 }, /* 0x06B1 D NGOEH */ + { 0x06B2, 0 }, /* 0x06B2 */ + { 0xFB96, 3 }, /* 0x06B3 D GUEH */ + { 0x06B4, 0 }, /* 0x06B4 */ + { 0x06B5, 0 }, /* 0x06B5 */ + { 0x06B6, 0 }, /* 0x06B6 */ + { 0x06B7, 0 }, /* 0x06B7 */ + { 0x06B8, 0 }, /* 0x06B8 */ + { 0x06B9, 0 }, /* 0x06B9 */ + { 0xFB9E, 1 }, /* 0x06BA R NOON GHUNNA */ + { 0xFBA0, 3 }, /* 0x06BB D RNOON */ + { 0x06BC, 0 }, /* 0x06BC */ + { 0x06BD, 0 }, /* 0x06BD */ + { 0xFBAA, 3 }, /* 0x06BE D HEH DOACHASHMEE */ + { 0x06BF, 0 }, /* 0x06BF */ + + { 0xFBA4, 1 }, /* 0x06C0 R HEH WITH YEH ABOVE */ + { 0xFBA6, 3 }, /* 0x06C1 D HEH GOAL */ + { 0x06C2, 0 }, /* 0x06C2 */ + { 0x06C3, 0 }, /* 0x06C3 */ + { 0x06C4, 0 }, /* 0x06C4 */ + { 0xFBE0, 1 }, /* 0x06C5 R KIRGHIZ OE */ + { 0xFBD9, 1 }, /* 0x06C6 R OE */ + { 0xFBD7, 1 }, /* 0x06C7 R U */ + { 0xFBDB, 1 }, /* 0x06C8 R YU */ + { 0xFBE2, 1 }, /* 0x06C9 R KIRGHIZ YU */ + { 0x06CA, 0 }, /* 0x06CA */ + { 0xFBDE, 1 }, /* 0x06CB R VE */ + { 0xFBFC, 3 }, /* 0x06CC D FARSI YEH */ + { 0x06CD, 0 }, /* 0x06CD */ + { 0x06CE, 0 }, /* 0x06CE */ + { 0x06CF, 0 }, /* 0x06CF */ + + { 0xFBE4, 3 }, /* 0x06D0 D E */ + { 0x06D1, 0 }, /* 0x06D1 */ + { 0xFBAE, 1 }, /* 0x06D2 R YEH BARREE */ + { 0xFBB0, 1 }, /* 0x06D3 R YEH BARREE WITH HAMZA ABOVE */ + { 0x06D4, 0 }, /* 0x06D4 */ + { 0x06D5, 0 }, /* 0x06D5 */ + { 0x06D6, 0 }, /* 0x06D6 */ + { 0x06D7, 0 }, /* 0x06D7 */ + { 0x06D8, 0 }, /* 0x06D8 */ + { 0x06D9, 0 }, /* 0x06D9 */ + { 0x06DA, 0 }, /* 0x06DA */ + { 0x06DB, 0 }, /* 0x06DB */ + { 0x06DC, 0 }, /* 0x06DC */ + { 0x06DD, 0 }, /* 0x06DD */ + { 0x06DE, 0 }, /* 0x06DE */ + { 0x06DF, 0 }, /* 0x06DF */ + + { 0x06E0, 0 }, /* 0x06E0 */ + { 0x06E1, 0 }, /* 0x06E1 */ + { 0x06E2, 0 }, /* 0x06E2 */ + { 0x06E3, 0 }, /* 0x06E3 */ + { 0x06E4, 0 }, /* 0x06E4 */ + { 0x06E5, 0 }, /* 0x06E5 */ + { 0x06E6, 0 }, /* 0x06E6 */ + { 0x06E7, 0 }, /* 0x06E7 */ + { 0x06E8, 0 }, /* 0x06E8 */ + { 0x06E9, 0 }, /* 0x06E9 */ + { 0x06EA, 0 }, /* 0x06EA */ + { 0x06EB, 0 }, /* 0x06EB */ + { 0x06EC, 0 }, /* 0x06EC */ + { 0x06ED, 0 }, /* 0x06ED */ + { 0x06EE, 0 }, /* 0x06EE */ + { 0x06EF, 0 }, /* 0x06EF */ + + { 0x06F0, 0 }, /* 0x06F0 */ + { 0x06F1, 0 }, /* 0x06F1 */ + { 0x06F2, 0 }, /* 0x06F2 */ + { 0x06F3, 0 }, /* 0x06F3 */ + { 0x06F4, 0 }, /* 0x06F4 */ + { 0x06F5, 0 }, /* 0x06F5 */ + { 0x06F6, 0 }, /* 0x06F6 */ + { 0x06F7, 0 }, /* 0x06F7 */ + { 0x06F8, 0 }, /* 0x06F8 */ + { 0x06F9, 0 }, /* 0x06F9 */ + { 0x06FA, 0 }, /* 0x06FA */ + { 0x06FB, 0 }, /* 0x06FB */ + { 0x06FC, 0 }, /* 0x06FC */ + { 0x06FD, 0 }, /* 0x06FD */ + { 0x06FE, 0 }, /* 0x06FE */ + { 0x06FF, 0 } /* 0x06FF */ +}; + +/* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, this table does */ +static const hb_uint16 alefMaksura[4] = {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}; + +/* +// this is a bit tricky. Alef always binds to the right, so the second parameter descibing the shape +// of the lam can be either initial of medial. So initial maps to the isolated form of the ligature, +// medial to the final form +*/ +static const hb_uint16 arabicUnicodeLamAlefMapping[6][4] = { + { 0xfffd, 0xfffd, 0xfef5, 0xfef6 }, /* 0x622 R Alef with Madda above */ + { 0xfffd, 0xfffd, 0xfef7, 0xfef8 }, /* 0x623 R Alef with Hamza above */ + { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x624 // Just to fill the table ;-) */ + { 0xfffd, 0xfffd, 0xfef9, 0xfefa }, /* 0x625 R Alef with Hamza below */ + { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x626 // Just to fill the table ;-) */ + { 0xfffd, 0xfffd, 0xfefb, 0xfefc } /* 0x627 R Alef */ +}; + +static int getShape(hb_uint8 cell, int shape) +{ + /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, handle this here */ + int ch = (cell != 0x49) + ? (shape ? arabicUnicodeMapping[cell][0] + shape : 0x600+cell) + : alefMaksura[shape] ; + return ch; +} + + +/* + Two small helper functions for arabic shaping. +*/ +static HB_UChar16 prevChar(const HB_UChar16 *str, int pos) +{ + /*qDebug("leftChar: pos=%d", pos); */ + const HB_UChar16 *ch = str + pos - 1; + pos--; + while(pos > -1) { + if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing) + return *ch; + pos--; + ch--; + } + return ReplacementCharacter; +} + +static HB_UChar16 nextChar(const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos) +{ + const HB_UChar16 *ch = str + pos + 1; + pos++; + while(pos < len) { + /*qDebug("rightChar: %d isLetter=%d, joining=%d", pos, ch.isLetter(), ch.joining()); */ + if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing) + return *ch; + /* assume it's a transparent char, this might not be 100% correct */ + pos++; + ch++; + } + return ReplacementCharacter; +} + +static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32 from, hb_uint32 len, HB_UChar16 *shapeBuffer, int *shapedLength, + HB_Bool reverse, HB_GlyphAttributes *attributes, unsigned short *logClusters) +{ + HB_ArabicProperties *properties; + hb_int32 f = from; + hb_uint32 l = len; + const HB_UChar16 *ch; + HB_UChar16 *data; + int clusterStart; + hb_uint32 i; + HB_STACKARRAY(HB_ArabicProperties, props, len + 2); + properties = props; + + assert(stringLength >= from + len); + + if(len == 0) { + *shapedLength = 0; + return; + } + + if (from > 0) { + --f; + ++l; + ++properties; + } + if (f + l < stringLength) + ++l; + getArabicProperties(uc+f, l, props); + + ch = uc + from; + data = shapeBuffer; + clusterStart = 0; + + for (i = 0; i < len; i++) { + hb_uint8 r = *ch >> 8; + int gpos = data - shapeBuffer; + + if (r != 0x06) { + if (r == 0x20) { + if (*ch == 0x200c || *ch == 0x200d) + /* remove ZWJ and ZWNJ */ + goto skip; + } + if (reverse) + *data = HB_GetMirroredChar(*ch); + else + *data = *ch; + } else { + hb_uint8 c = *ch & 0xff; + int pos = i + from; + int shape = properties[i].shape; +/* qDebug("mapping U+%x to shape %d glyph=0x%x", ch->unicode(), shape, getShape(c, shape)); */ + /* take care of lam-alef ligatures (lam right of alef) */ + hb_uint16 map; + switch (c) { + case 0x44: { /* lam */ + const HB_UChar16 pch = nextChar(uc, stringLength, pos); + if ((pch >> 8) == 0x06) { + switch (pch & 0xff) { + case 0x22: + case 0x23: + case 0x25: + case 0x27: +/* qDebug(" lam of lam-alef ligature"); */ + map = arabicUnicodeLamAlefMapping[(pch & 0xff) - 0x22][shape]; + goto next; + default: + break; + } + } + break; + } + case 0x22: /* alef with madda */ + case 0x23: /* alef with hamza above */ + case 0x25: /* alef with hamza below */ + case 0x27: /* alef */ + if (prevChar(uc, pos) == 0x0644) { + /* have a lam alef ligature */ + /*qDebug(" alef of lam-alef ligature"); */ + goto skip; + } + default: + break; + } + map = getShape(c, shape); + next: + *data = map; + } + /* ##### Fixme */ + /*glyphs[gpos].attributes.zeroWidth = zeroWidth; */ + if (HB_GetUnicodeCharCategory(*ch) == HB_Mark_NonSpacing) { + attributes[gpos].mark = TRUE; +/* qDebug("glyph %d (char %d) is mark!", gpos, i); */ + } else { + attributes[gpos].mark = FALSE; + clusterStart = data - shapeBuffer; + } + attributes[gpos].clusterStart = !attributes[gpos].mark; + attributes[gpos].combiningClass = HB_GetUnicodeCharCombiningClass(*ch); + attributes[gpos].justification = properties[i].justification; +/* qDebug("data[%d] = %x (from %x)", gpos, (uint)data->unicode(), ch->unicode());*/ + data++; + skip: + ch++; + logClusters[i] = clusterStart; + } + *shapedLength = data - shapeBuffer; + + HB_FREE_STACKARRAY(props); +} + +#ifndef NO_OPENTYPE + +static const HB_OpenTypeFeature arabic_features[] = { + { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty }, + { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty }, + { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty }, + { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, + { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty }, + { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty }, + { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty }, + { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty }, + { HB_MAKE_TAG('c', 's', 'w', 'h'), CswhProperty }, + /* mset is used in old Win95 fonts that don't have a 'mark' positioning table. */ + { HB_MAKE_TAG('m', 's', 'e', 't'), MsetProperty }, + {0, 0} +}; + +static const HB_OpenTypeFeature syriac_features[] = { + { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty }, + { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty }, + { HB_MAKE_TAG('f', 'i', 'n', '2'), FinaProperty }, + { HB_MAKE_TAG('f', 'i', 'n', '3'), FinaProperty }, + { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty }, + { HB_MAKE_TAG('m', 'e', 'd', '2'), MediProperty }, + { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, + { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty }, + { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty }, + { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty }, + { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty }, + {0, 0} +}; + +static HB_Bool arabicSyriacOpenTypeShape(HB_ShaperItem *item, HB_Bool *ot_ok) +{ + const HB_UChar16 *uc; + const int nglyphs = item->num_glyphs; + hb_int32 f; + hb_uint32 l; + HB_ArabicProperties *properties; + HB_DECLARE_STACKARRAY(HB_ArabicProperties, props) + HB_DECLARE_STACKARRAY(hb_uint32, apply) + HB_Bool shaped; + int i = 0; + + *ot_ok = TRUE; + + if (!HB_ConvertStringToGlyphIndices(item)) + return FALSE; + HB_HeuristicSetGlyphAttributes(item); + + HB_INIT_STACKARRAY(HB_ArabicProperties, props, item->item.length + 2); + HB_INIT_STACKARRAY(hb_uint32, apply, item->num_glyphs); + + uc = item->string + item->item.pos; + + properties = props; + f = 0; + l = item->item.length; + if (item->item.pos > 0) { + --f; + ++l; + ++properties; + } + if (f + l < item->stringLength) { + ++l; + } + getArabicProperties(uc+f, l, props); + + for (i = 0; i < (int)item->num_glyphs; i++) { + apply[i] = 0; + + if (properties[i].shape == XIsolated) + apply[i] |= MediProperty|FinaProperty|InitProperty; + else if (properties[i].shape == XMedial) + apply[i] |= IsolProperty|FinaProperty|InitProperty; + else if (properties[i].shape == XFinal) + apply[i] |= IsolProperty|MediProperty|InitProperty; + else if (properties[i].shape == XInitial) + apply[i] |= IsolProperty|MediProperty|FinaProperty; + + item->attributes[i].justification = properties[i].justification; + } + + HB_FREE_STACKARRAY(props); + + shaped = HB_OpenTypeShape(item, apply); + + HB_FREE_STACKARRAY(apply); + + if (!shaped) { + *ot_ok = FALSE; + return FALSE; + } + return HB_OpenTypePosition(item, nglyphs, /*doLogClusters*/TRUE); +} + +#endif + +/* #### stil missing: identify invalid character combinations */ +HB_Bool HB_ArabicShape(HB_ShaperItem *item) +{ + int slen; + HB_Bool haveGlyphs; + HB_STACKARRAY(HB_UChar16, shapedChars, item->item.length); + + assert(item->item.script == HB_Script_Arabic || item->item.script == HB_Script_Syriac); + +#ifndef NO_OPENTYPE + + if (HB_SelectScript(item, item->item.script == HB_Script_Arabic ? arabic_features : syriac_features)) { + HB_Bool ot_ok; + if (arabicSyriacOpenTypeShape(item, &ot_ok)) + return TRUE; + if (ot_ok) + return FALSE; + /* fall through to the non OT code*/ + } +#endif + + if (item->item.script == HB_Script_Syriac) + return HB_BasicShape(item); + + shapedString(item->string, item->stringLength, item->item.pos, item->item.length, shapedChars, &slen, + item->item.bidiLevel % 2, + item->attributes, item->log_clusters); + + haveGlyphs = item->font->klass + ->convertStringToGlyphIndices(item->font, + shapedChars, slen, + item->glyphs, &item->num_glyphs, + item->item.bidiLevel % 2); + + HB_FREE_STACKARRAY(shapedChars); + + if (!haveGlyphs) + return FALSE; + + HB_HeuristicPosition(item); + return TRUE; +} + + |