#include <Arduino.h>
#include <stdint.h>
#include <string.h>
#define GSM7_ESCAPE 0x1B
// Convert a single hex char ('0'..'9','A'..'F','a'..'f') to its nibble value
int hexNibble(char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'A' && c <= 'F') return 10 + (c - 'A');
if (c >= 'a' && c <= 'f') return 10 + (c - 'a');
return -1;
}
// Convert two hex chars into a byte (e.g. "4F" -> 0x4F). Returns -1 on error.
int hexByte(const char *p) {
int hi = hexNibble(p[0]);
int lo = hexNibble(p[1]);
if (hi < 0 || lo < 0) return -1;
return (hi << 4) | lo;
}
/*
Map a single UCS2 (Unicode) codepoint to 1 or 2 GSM-7 bytes.
- cp: Unicode codepoint (0x0000..0xFFFF, usually from UCS2)
- out: buffer to write GSM-7 bytes
- outSize: size of 'out' buffer
Returns: number of bytes written (1 or 2),
0 if not representable (caller can insert '?')
*/
size_t unicodeToGsm7(uint16_t cp, uint8_t *out, size_t outSize) {
if (outSize == 0) return 0;
// Common ASCII subset that is identical in GSM-7 for these chars
if (cp >= 0x20 && cp <= 0x7E) {
// NOTE: GSM-7 is a 7-bit alphabet, but the values match basic ASCII here.
out[0] = (uint8_t)cp;
return 1;
}
// Space, CR, LF (already covered by range above for space)
if (cp == 0x000D) { // CR
out[0] = 0x0D;
return 1;
}
if (cp == 0x000A) { // LF
out[0] = 0x0A;
return 1;
}
// Some GSM-7 default alphabet specials (Unicode -> GSM-7)
// This is a small sample; extend as needed.
switch (cp) {
case 0x0040: out[0] = 0x00; return 1; // '@'
case 0x00A3: out[0] = 0x01; return 1; // '£'
case 0x0024: out[0] = 0x02; return 1; // '$'
case 0x00A5: out[0] = 0x03; return 1; // '¥'
case 0x00E8: out[0] = 0x04; return 1; // 'è'
case 0x00E9: out[0] = 0x05; return 1; // 'é'
case 0x00F9: out[0] = 0x06; return 1; // 'ù'
case 0x00EC: out[0] = 0x07; return 1; // 'ì'
case 0x00F2: out[0] = 0x08; return 1; // 'ò'
case 0x00C7: out[0] = 0x09; return 1; // 'Ç'
case 0x00D8: out[0] = 0x0B; return 1; // 'Ø'
case 0x00F8: out[0] = 0x0C; return 1; // 'ø'
case 0x00C5: out[0] = 0x0E; return 1; // 'Å'
case 0x00E5: out[0] = 0x0F; return 1; // 'å'
case 0x0394: out[0] = 0x10; return 1; // 'Δ'
case 0x005F: out[0] = 0x11; return 1; // '_'
case 0x03A6: out[0] = 0x12; return 1; // 'Φ'
case 0x0393: out[0] = 0x13; return 1; // 'Γ'
case 0x039B: out[0] = 0x14; return 1; // 'Λ'
case 0x03A9: out[0] = 0x15; return 1; // 'Ω'
case 0x03A0: out[0] = 0x16; return 1; // 'Π'
case 0x03A8: out[0] = 0x17; return 1; // 'Ψ'
case 0x03A3: out[0] = 0x18; return 1; // 'Σ'
case 0x0398: out[0] = 0x19; return 1; // 'Θ'
case 0x039E: out[0] = 0x1A; return 1; // 'Ξ'
case 0x00C6: out[0] = 0x1C; return 1; // 'Æ'
case 0x00E6: out[0] = 0x1D; return 1; // 'æ'
case 0x00DF: out[0] = 0x1E; return 1; // 'ß'
case 0x00C9: out[0] = 0x1F; return 1; // 'É'
case 0x00C4: out[0] = 0x5B; return 1; // 'Ä'
case 0x00D6: out[0] = 0x5C; return 1; // 'Ö'
case 0x00D1: out[0] = 0x5D; return 1; // 'Ñ'
case 0x00DC: out[0] = 0x5E; return 1; // 'Ü'
case 0x00A7: out[0] = 0x5F; return 1; // '§'
case 0x00E4: out[0] = 0x7B; return 1; // 'ä'
case 0x00F6: out[0] = 0x7C; return 1; // 'ö'
case 0x00F1: out[0] = 0x7D; return 1; // 'ñ'
case 0x00FC: out[0] = 0x7E; return 1; // 'ü'
case 0x00E0: out[0] = 0x7F; return 1; // 'à'
}
// Extension table examples (requires ESC 0x1B)
// Here we only implement '€' as example.
if (cp == 0x20AC) { // '€'
if (outSize < 2) return 0;
out[0] = GSM7_ESCAPE;
out[1] = 0x65; // Euro sign in GSM-7 extension table
return 2;
}
// Not representable in this minimal table
return 0;
}
/*
Convert a UCS2-encoded hex string (e.g. "00480065006C006C006F")
to a GSM-7 text string (1 char per GSM-7 code, not packed septets).
- ucs2Hex: input hex string, length must be multiple of 4 (each 2 bytes = 1 codepoint).
Usually from AT+CMGR in UCS2 mode without quotes.
- gsmOut: buffer for resulting GSM-7 characters (0-terminated).
- outSize: size of gsmOut buffer (bytes).
Unsupported chars are replaced by '?'.
*/
bool ucs2HexToGsm7(const char *ucs2Hex, char *gsmOut, size_t outSize) {
if (!ucs2Hex || !gsmOut || outSize == 0) return false;
size_t inLen = strlen(ucs2Hex);
if (inLen % 4 != 0) {
// Each UCS2 char is 4 hex chars: high byte + low byte
return false;
}
size_t outPos = 0;
for (size_t i = 0; i < inLen; i += 4) {
if (outPos >= outSize - 1) break; // leave space for '\0'
int hiByte = hexByte(&ucs2Hex[i]);
int loByte = hexByte(&ucs2Hex[i + 2]);
if (hiByte < 0 || loByte < 0) {
// invalid hex, stop
break;
}
uint16_t cp = ((uint16_t)hiByte << 8) | (uint16_t)loByte;
uint8_t tmp[2];
size_t written = unicodeToGsm7(cp, tmp, sizeof(tmp));
if (written == 0) {
// Not representable -> fallback to '?'
gsmOut[outPos++] = '?';
} else {
for (size_t j = 0; j < written && outPos < outSize - 1; j++) {
gsmOut[outPos++] = (char)tmp[j];
}
}
}
gsmOut[outPos] = '\0';
return true;
}
// --- Demo on Arduino ---
void setup() {
Serial.begin(9600);
while (!Serial) { /* wait */ }
// "Hello World!" in UCS2 (big endian: 0048 0065 006C 006C 006F 0020 0057 006F 0072 006C 0064 0021)
const char *ucs2Example = "00480065006C006C006F00200057006F0072006C00640021";
char gsmText[160]; // enough for demo
if (ucs2HexToGsm7(ucs2Example, gsmText, sizeof(gsmText))) {
Serial.print(F("GSM-7 text: "));
Serial.println(gsmText);
} else {
Serial.println(F("Conversion failed."));
}
// Example with Euro sign: "Euro €" -> "004500750072006F00200020AC"
const char *ucs2Euro = "004500750072006F00200020AC";
if (ucs2HexToGsm7(ucs2Euro, gsmText, sizeof(gsmText))) {
Serial.print(F("GSM-7 text (Euro example): "));
// Note: output will contain ESC (0x1B) + 'e' for '€'.
for (size_t i = 0; gsmText[i] != '\0'; ++i) {
uint8_t b = (uint8_t)gsmText[i];
if (b < 0x20 || b == 0x7F) {
// print control/escape in hex form
Serial.print("[0x");
if (b < 0x10) Serial.print('0');
Serial.print(b, HEX);
Serial.print("]");
} else {
Serial.print((char)b);
}
}
Serial.println();
}
}
void loop() {
// nothing here
}