mirror of
https://github.com/HackHerz/pusher
synced 2025-12-06 02:10:19 +00:00
Now using the PushNotifier SDK and submodules
This commit is contained in:
parent
7d8642c9fb
commit
0eca806d46
13 changed files with 34 additions and 14703 deletions
6
.gitmodules
vendored
Normal file
6
.gitmodules
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
[submodule "src/pushnotifier-sdk-cpp"]
|
||||
path = src/pushnotifier-sdk-cpp
|
||||
url = https://github.com/HackHerz/pushnotifier-sdk-cpp
|
||||
[submodule "src/simpleini"]
|
||||
path = src/simpleini
|
||||
url = https://github.com/brofield/simpleini
|
||||
29
makefile
29
makefile
|
|
@ -5,16 +5,18 @@ TARGET = pusher
|
|||
INSTALL_DIR = /usr/local/bin
|
||||
CXX = g++
|
||||
CPPFLAGS = -std=c++11
|
||||
BUILDCOMMAND = $(CXX) $(CPPFLAGS)
|
||||
CDEFS = -DAPI_TOKEN=\"8E7D8B2DDE7DDE7D6C3V52VB52VBD4DDETBTTTKFFB11\"
|
||||
CDEFS += -DAPP_PACKAGE=\"com.hackherz.pusher\"
|
||||
BUILDCOMMAND = $(CXX) $(CDEFS) $(CPPFLAGS)
|
||||
LIBS = `pkg-config libcurl --cflags --libs`
|
||||
|
||||
|
||||
#=============================================================================
|
||||
# Build
|
||||
all: pusher
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): simpleini curlhandler pushhandler main
|
||||
$(BUILDCOMMAND) src/simpleini/ConvertUTF.o src/curlhandler.o src/pushhandler.o src/main.o $(LIBS) -o $(TARGET)
|
||||
$(TARGET): simpleini pushnotifier main
|
||||
$(BUILDCOMMAND) src/simpleini/ConvertUTF.o src/pushnotifier-sdk-cpp/pushnotifier.o src/main.o $(LIBS) -o $(TARGET)
|
||||
|
||||
|
||||
# simpleini
|
||||
|
|
@ -25,20 +27,12 @@ src/simpleini/ConvertUTF.o: src/simpleini/ConvertUTF.c
|
|||
$(BUILDCOMMAND) -c src/simpleini/ConvertUTF.c -o src/simpleini/ConvertUTF.o
|
||||
|
||||
|
||||
# curlhandler
|
||||
.PHONY: curlhandler
|
||||
curlhandler: src/curlhandler.o
|
||||
# pushnotifier
|
||||
.PHONY: pushnotifier
|
||||
pushnotifier: src/pushnotifier-sdk-cpp/pushnotifier.o
|
||||
|
||||
src/curlhandler.o: src/curlhandler.cpp
|
||||
$(BUILDCOMMAND) -c src/curlhandler.cpp -o src/curlhandler.o
|
||||
|
||||
|
||||
# pushhandler
|
||||
.PHONY: pushhandler
|
||||
pushhandler: src/pushhandler.o
|
||||
|
||||
src/pushhandler.o: src/pushhandler.cpp
|
||||
$(BUILDCOMMAND) -c src/pushhandler.cpp -o src/pushhandler.o
|
||||
src/pushnotifier-sdk-cpp/pushnotifier.o: src/pushnotifier-sdk-cpp/PushNotifier.cpp
|
||||
$(BUILDCOMMAND) -c src/pushnotifier-sdk-cpp/PushNotifier.cpp -o src/pushnotifier-sdk-cpp/pushnotifier.o
|
||||
|
||||
|
||||
# main
|
||||
|
|
@ -54,6 +48,7 @@ src/main.o: src/main.cpp
|
|||
clean:
|
||||
rm -f src/*.o
|
||||
rm -f src/simpleini/*.o
|
||||
rm -f src/pushnotifier-sdk-cpp/pushnotifier.o
|
||||
rm -f $(TARGET)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,71 +0,0 @@
|
|||
#include "curlhandler.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
// urlDecode matches
|
||||
string matches[][2] = {
|
||||
{"$", "%24"},
|
||||
{"&", "%26"},
|
||||
{"+", "%2B"},
|
||||
{",", "%2C"},
|
||||
{"/", "%2F"},
|
||||
{":", "%3A"},
|
||||
{";", "%3B"},
|
||||
{"=", "%3D"},
|
||||
{"?", "%3F"},
|
||||
{"@", "%40"}
|
||||
};
|
||||
|
||||
|
||||
|
||||
// needed for handling curl output
|
||||
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp)
|
||||
{
|
||||
((std::string*)userp)->append((char*)contents, size * nmemb);
|
||||
return size * nmemb;
|
||||
}
|
||||
|
||||
|
||||
string CurlHandler::request(string data, const char* url)
|
||||
{
|
||||
CURL *curl;
|
||||
CURLcode res;
|
||||
string readBuffer;
|
||||
curl = curl_easy_init();
|
||||
|
||||
if(curl)
|
||||
{
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, USER_AGENT);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
|
||||
res = curl_easy_perform(curl);
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
if(res != CURLE_OK)
|
||||
{
|
||||
//throw PusherError("Network Error");
|
||||
}
|
||||
}
|
||||
|
||||
return readBuffer;
|
||||
}
|
||||
|
||||
|
||||
string CurlHandler::urlDecode(string url)
|
||||
{
|
||||
|
||||
for(unsigned int i = 0; i < (sizeof(matches)/sizeof(matches[0])); i++)
|
||||
{
|
||||
size_t start_pos = 0;
|
||||
while((start_pos = url.find(matches[i][0], start_pos)) != string::npos)
|
||||
{
|
||||
url.replace(start_pos, matches[i][0].length(), matches[i][1]);
|
||||
start_pos += matches[i][1].length();
|
||||
}
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
#ifndef H_CURLHANDLER
|
||||
#define H_CURLHANDLER
|
||||
|
||||
#include <curl/curl.h>
|
||||
#include <string>
|
||||
|
||||
#define USER_AGENT "pusher/0.2"
|
||||
|
||||
|
||||
class CurlHandler
|
||||
{
|
||||
public:
|
||||
static std::string request(std::string data, const char* url);
|
||||
static std::string urlDecode(std::string url);
|
||||
};
|
||||
|
||||
#endif
|
||||
10183
src/json/json.hpp
10183
src/json/json.hpp
File diff suppressed because it is too large
Load diff
54
src/main.cpp
54
src/main.cpp
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#include "tclap/CmdLine.h"
|
||||
#include "simpleini/SimpleIni.h"
|
||||
#include "pushhandler.h"
|
||||
#include "pushnotifier-sdk-cpp/PushNotifier.h"
|
||||
|
||||
#define CONFIG_FILE "/etc/pusher.conf"
|
||||
|
||||
|
|
@ -56,14 +56,13 @@ int main(int argc, char **argv)
|
|||
TCLAP::CmdLine cmd("Push notifications to your phone easily.", ' ', "0.3");
|
||||
|
||||
// Values
|
||||
TCLAP::ValueArg<int> idArg("i","id","ID of the device.",false,0,"number");
|
||||
TCLAP::ValueArg<string> idArg("i","id","ID of the device.",false,"0","string");
|
||||
cmd.add(idArg);
|
||||
|
||||
// Switches
|
||||
TCLAP::SwitchArg tokenSwitch("t", "token", "Request your token.", cmd, false);
|
||||
TCLAP::SwitchArg listSwitch("l", "list", "List all your devices.", cmd, false);
|
||||
TCLAP::SwitchArg pipeSwitch("p", "pipe", "Input via pipe.", cmd, false);
|
||||
TCLAP::SwitchArg verifySwitch("v","verify","Checks if token is still valid.", cmd, false);
|
||||
|
||||
|
||||
// add unlabeled argument
|
||||
|
|
@ -76,8 +75,7 @@ int main(int argc, char **argv)
|
|||
|
||||
|
||||
// Variables
|
||||
string message;
|
||||
int id;
|
||||
string message, id;
|
||||
CSimpleIniA iniReader;
|
||||
iniReader.SetUnicode();
|
||||
|
||||
|
|
@ -85,7 +83,8 @@ int main(int argc, char **argv)
|
|||
// Request token
|
||||
if(tokenSwitch.getValue())
|
||||
{
|
||||
string username, password, token;
|
||||
string username, password;
|
||||
PushNotifier::AppToken token;
|
||||
|
||||
// Read username
|
||||
cout << "Username: ";
|
||||
|
|
@ -103,13 +102,13 @@ int main(int argc, char **argv)
|
|||
cout << endl;
|
||||
|
||||
// pusher instance
|
||||
PushHandler buf(username);
|
||||
token = buf.login(password);
|
||||
PushNotifier buf;
|
||||
token = buf.login(username, password);
|
||||
|
||||
|
||||
// Build config
|
||||
iniReader.SetValue("pusher", "username", username.c_str());
|
||||
iniReader.SetValue("pusher", "appToken", token.c_str());
|
||||
iniReader.SetValue("pusher", "appToken", token.token.c_str());
|
||||
|
||||
// Check if file is writable
|
||||
if(iniReader.SaveFile(CONFIG_FILE) < 0)
|
||||
|
|
@ -135,7 +134,7 @@ int main(int argc, char **argv)
|
|||
// Check if reading of config is possible
|
||||
if(iniReader.LoadFile(CONFIG_FILE) < 0)
|
||||
{
|
||||
throw PusherError("You need to login first.");
|
||||
throw runtime_error("You need to login first.");
|
||||
}
|
||||
|
||||
string username = iniReader.GetValue("pusher", "username", "");
|
||||
|
|
@ -143,37 +142,20 @@ int main(int argc, char **argv)
|
|||
|
||||
if(username.empty() || appToken.empty())
|
||||
{
|
||||
throw PusherError("You need to login first.");
|
||||
throw runtime_error("You need to login first.");
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Loading values
|
||||
PushHandler pusherInstance(username, appToken);
|
||||
|
||||
|
||||
|
||||
// Verify token
|
||||
if(verifySwitch.getValue())
|
||||
{
|
||||
if(pusherInstance.verifyToken())
|
||||
{
|
||||
cout << "appToken is valid" << endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "appToken is invalid" << endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
PushNotifier pusherInstance(username, appToken, 0);
|
||||
|
||||
|
||||
|
||||
// List devices
|
||||
if(listSwitch.getValue())
|
||||
{
|
||||
vector<PushHandler::Device> devices;
|
||||
vector<PushNotifier::Device> devices;
|
||||
devices = pusherInstance.getDevices();
|
||||
|
||||
unsigned int titleLength = 5;
|
||||
|
|
@ -209,7 +191,7 @@ int main(int argc, char **argv)
|
|||
|
||||
|
||||
// Device id
|
||||
if(idArg.getValue() != 0)
|
||||
if(sizeof(idArg) != 0)
|
||||
{
|
||||
id = idArg.getValue();
|
||||
}
|
||||
|
|
@ -242,7 +224,7 @@ int main(int argc, char **argv)
|
|||
stringstream stringID;
|
||||
stringID << id;
|
||||
|
||||
pusherInstance.sendToDevice(stringID.str(), message);
|
||||
pusherInstance.sendMessage(stringID.str(), message);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -257,14 +239,6 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
|
||||
// errors thrown by pushhandler
|
||||
catch(PusherError& e)
|
||||
{
|
||||
cout << "Error: " << e.what() << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
// other errors
|
||||
catch(exception& e)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,188 +0,0 @@
|
|||
#include "pushhandler.h"
|
||||
#include "json/json.hpp"
|
||||
#include "curlhandler.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using json = nlohmann::json;
|
||||
|
||||
|
||||
// constructor only username
|
||||
PushHandler::PushHandler(string username)
|
||||
{
|
||||
this->username = username;
|
||||
}
|
||||
|
||||
|
||||
// constructor with appToken
|
||||
PushHandler::PushHandler(string username, string appToken)
|
||||
{
|
||||
this->username = username;
|
||||
this->appToken = appToken;
|
||||
}
|
||||
|
||||
|
||||
// login
|
||||
string PushHandler::login(string password)
|
||||
{
|
||||
// build request data
|
||||
stringstream requestData;
|
||||
requestData << "apiToken=" << API_TOKEN;
|
||||
requestData << "&username=" << this->username;
|
||||
requestData << "&password=" << password;
|
||||
|
||||
// network request
|
||||
string readBuffer;
|
||||
readBuffer = CurlHandler::request(requestData.str(), URL_PN_LOGIN);
|
||||
|
||||
// json parsing
|
||||
stringstream jsonData;
|
||||
jsonData << readBuffer;
|
||||
|
||||
json j;
|
||||
j << jsonData;
|
||||
|
||||
if(j["status"].get<string>() != "ok")
|
||||
{
|
||||
throw PusherError("wrong credentials");
|
||||
}
|
||||
|
||||
this->appToken = j["appToken"].get<string>();
|
||||
return this->appToken;
|
||||
}
|
||||
|
||||
|
||||
// get list of devices
|
||||
vector<PushHandler::Device> PushHandler::getDevices()
|
||||
{
|
||||
// build request data
|
||||
stringstream requestData;
|
||||
requestData << "apiToken=" << API_TOKEN;
|
||||
requestData << "&appToken=" << this->appToken;
|
||||
|
||||
// network request
|
||||
string readBuffer;
|
||||
readBuffer =CurlHandler::request(requestData.str(), URL_PN_GET_DEVICES);
|
||||
|
||||
// json parsing
|
||||
stringstream jsonData;
|
||||
jsonData << readBuffer;
|
||||
|
||||
json j;
|
||||
j << jsonData;
|
||||
|
||||
// handle the codes
|
||||
switch(j["code"].get<int>())
|
||||
{
|
||||
case 1: throw PusherError("Invalid API Token");
|
||||
break;
|
||||
case 2: throw PusherError("App Token missing");
|
||||
break;
|
||||
case 3: throw PusherError("App Token invalid");
|
||||
}
|
||||
|
||||
vector<Device> buffer;
|
||||
|
||||
for(auto element : j["devices"])
|
||||
{
|
||||
Device buf;
|
||||
|
||||
buf.title = element["title"].get<string>();
|
||||
buf.id = to_string(element["id"].get<int>());
|
||||
buf.model = element["model"].get<string>();
|
||||
|
||||
buffer.push_back(buf);
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
// verify token
|
||||
bool PushHandler::verifyToken()
|
||||
{
|
||||
// build request data
|
||||
stringstream requestData;
|
||||
requestData << "apiToken=" << API_TOKEN;
|
||||
requestData << "&username=" << this->username;
|
||||
requestData << "&appToken=" << this->appToken;
|
||||
|
||||
// network request
|
||||
string readBuffer;
|
||||
readBuffer = CurlHandler::request(requestData.str(), URL_PN_CHECK_TOKEN);
|
||||
|
||||
// json parser
|
||||
stringstream jsonData;
|
||||
jsonData << readBuffer;
|
||||
|
||||
json j;
|
||||
j << jsonData;
|
||||
|
||||
switch(j["code"].get<int>())
|
||||
{
|
||||
case 0: return true;
|
||||
break;
|
||||
case 1: throw PusherError("Invalid API-Token");
|
||||
break;
|
||||
case 2: return false;
|
||||
default: throw PusherError("Invalid server response");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// send to device
|
||||
void PushHandler::sendToDevice(string id, string message)
|
||||
{
|
||||
// analyze content-type
|
||||
// ......
|
||||
|
||||
|
||||
// actual sending
|
||||
// build request data
|
||||
stringstream requestData;
|
||||
requestData << "apiToken=" << API_TOKEN;
|
||||
requestData << "&appToken=" << this->appToken;
|
||||
requestData << "&app=" << APP_PACKAGE;
|
||||
requestData << "&deviceID=" << id;
|
||||
requestData << "&type=" << "MESSAGE";
|
||||
requestData << "&content=" << CurlHandler::urlDecode(message);
|
||||
|
||||
// network request
|
||||
string readBuffer;
|
||||
readBuffer = CurlHandler::request(requestData.str(), URL_PN_SEND_TO_DEVICE);
|
||||
|
||||
// json parsing
|
||||
stringstream jsonData;
|
||||
jsonData << readBuffer;
|
||||
|
||||
json j;
|
||||
j << jsonData;
|
||||
|
||||
switch(j["code"].get<int>())
|
||||
{
|
||||
case 0: //return 0;
|
||||
break;
|
||||
case 1: throw PusherError("Invalid API Token");
|
||||
break;
|
||||
case 2: throw PusherError("App Token missing");
|
||||
break;
|
||||
case 3: throw PusherError("App Token invalid");
|
||||
break;
|
||||
case 4: throw PusherError("Package Name missing");
|
||||
break;
|
||||
case 5: throw PusherError("Package Name invalid");
|
||||
break;
|
||||
case 6: throw PusherError("Package Name is not linked with the provided API Token");
|
||||
break;
|
||||
case 7: throw PusherError("Device ID missing");
|
||||
break;
|
||||
case 8: throw PusherError("Device ID invalid");
|
||||
break;
|
||||
case 9: throw PusherError("Type missing or invalid");
|
||||
break;
|
||||
default: throw PusherError("Invalid server response");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
#ifndef H_PUSHHANDLER
|
||||
#define H_PUSHHANDLER
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Change this line if you are using your own API-Token
|
||||
const char API_TOKEN[] = {
|
||||
0x38, 0x45, 0x37, 0x44, 0x38, 0x42, 0x32,
|
||||
0x44, 0x44, 0x45, 0x37, 0x44, 0x44, 0x45,
|
||||
0x37, 0x44, 0x36, 0x43, 0x33, 0x56, 0x35,
|
||||
0x32, 0x56, 0x42, 0x35, 0x32, 0x56, 0x42,
|
||||
0x44, 0x34, 0x44, 0x44, 0x45, 0x54, 0x42,
|
||||
0x54, 0x54, 0x54, 0x4b, 0x46, 0x46, 0x42 };
|
||||
|
||||
#define APP_PACKAGE "com.hackherz.pusher"
|
||||
|
||||
#define URL_PN_LOGIN "http://a.pushnotifier.de/1/login/"
|
||||
#define URL_PN_CHECK_TOKEN "http://a.pushnotifier.de/1/checkToken/"
|
||||
#define URL_PN_GET_DEVICES "http://a.pushnotifier.de/1/getDevices/"
|
||||
#define URL_PN_SEND_TO_DEVICE "http://a.pushnotifier.de/1/sendToDevice/"
|
||||
|
||||
|
||||
|
||||
class PushHandler
|
||||
{
|
||||
public:
|
||||
PushHandler(std::string username);
|
||||
PushHandler(std::string username, std::string appToken);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
std::string title;
|
||||
std::string id;
|
||||
std::string model;
|
||||
} Device;
|
||||
|
||||
|
||||
std::string login(std::string password);
|
||||
std::vector<Device> getDevices();
|
||||
void sendToDevice(std::string deviceID, std::string message);
|
||||
bool verifyToken();
|
||||
|
||||
|
||||
private:
|
||||
std::string username;
|
||||
std::string appToken;
|
||||
};
|
||||
|
||||
|
||||
// class for exceptions
|
||||
class PusherError
|
||||
{
|
||||
public:
|
||||
PusherError(std::string content) { this->content = content; }
|
||||
std::string what() { return this->content; }
|
||||
private:
|
||||
std::string content;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
1
src/pushnotifier-sdk-cpp
Submodule
1
src/pushnotifier-sdk-cpp
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 15032038ebc9df984f650181c176b1694aa3f33e
|
||||
1
src/simpleini
Submodule
1
src/simpleini
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 2af65fcc504f8242752755e836709762ef7ce062
|
||||
|
|
@ -1,539 +0,0 @@
|
|||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
|
||||
Author: Mark E. Davis, 1994.
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Sept 2001: fixed const & error conditions per
|
||||
mods suggested by S. Parent & A. Lillich.
|
||||
June 2002: Tim Dodd added detection and handling of incomplete
|
||||
source sequences, enhanced error detection, added casts
|
||||
to eliminate compiler warnings.
|
||||
July 2003: slight mods to back out aggressive FFFE detection.
|
||||
Jan 2004: updated switches in from-UTF8 conversions.
|
||||
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
|
||||
|
||||
See the header file "ConvertUTF.h" for complete documentation.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
#ifdef CVTUTF_DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||
|
||||
static const UTF32 halfBase = 0x0010000UL;
|
||||
static const UTF32 halfMask = 0x3FFUL;
|
||||
|
||||
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
||||
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
||||
#define UNI_SUR_LOW_START (UTF32)0xDC00
|
||||
#define UNI_SUR_LOW_END (UTF32)0xDFFF
|
||||
#define false 0
|
||||
#define true 1
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF32* source = *sourceStart;
|
||||
UTF16* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
if (target >= targetEnd) {
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch = *source++;
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_LEGAL_UTF32) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF16* source = *sourceStart;
|
||||
UTF32* target = *targetStart;
|
||||
UTF32 ch, ch2;
|
||||
while (source < sourceEnd) {
|
||||
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||
ch = *source++;
|
||||
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||
if (source < sourceEnd) {
|
||||
ch2 = *source;
|
||||
/* If it's a low surrogate, convert to UTF32. */
|
||||
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (target >= targetEnd) {
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
*target++ = ch;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
#ifdef CVTUTF_DEBUG
|
||||
if (result == sourceIllegal) {
|
||||
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Index into the table below with the first byte of a UTF-8 sequence to
|
||||
* get the number of trailing bytes that are supposed to follow it.
|
||||
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
|
||||
* left as-is for anyone who may want to do such conversion, which was
|
||||
* allowed in earlier algorithms.
|
||||
*/
|
||||
static const char trailingBytesForUTF8[256] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
||||
};
|
||||
|
||||
/*
|
||||
* Magic values subtracted from a buffer value during UTF8 conversion.
|
||||
* This table contains as many values as there might be trailing bytes
|
||||
* in a UTF-8 sequence.
|
||||
*/
|
||||
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
||||
|
||||
/*
|
||||
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
|
||||
* into the first byte, depending on how many bytes follow. There are
|
||||
* as many entries in this table as there are UTF-8 sequence types.
|
||||
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
||||
* for *legal* UTF-8 will be 4 or fewer bytes total.
|
||||
*/
|
||||
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* The interface converts a whole buffer to avoid function-call overhead.
|
||||
* Constants have been gathered. Loops & conditionals have been removed as
|
||||
* much as possible for efficiency, in favor of drop-through switches.
|
||||
* (See "Note A" at the bottom of the file for equivalent code.)
|
||||
* If your compiler supports it, the "isLegalUTF8" call can be turned
|
||||
* into an inline function.
|
||||
*/
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF16* source = *sourceStart;
|
||||
UTF8* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
unsigned short bytesToWrite = 0;
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||
ch = *source++;
|
||||
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||
if (source < sourceEnd) {
|
||||
UTF32 ch2 = *source;
|
||||
/* If it's a low surrogate, convert to UTF32. */
|
||||
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Figure out how many bytes the result will require */
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
if (target > targetEnd) {
|
||||
source = oldSource; /* Back up source pointer! */
|
||||
target -= bytesToWrite; result = targetExhausted; break;
|
||||
}
|
||||
switch (bytesToWrite) { /* note: everything falls through. */
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
|
||||
* This must be called with the length pre-determined by the first byte.
|
||||
* If not calling this from ConvertUTF8to*, then the length can be set by:
|
||||
* length = trailingBytesForUTF8[*source]+1;
|
||||
* and the sequence is illegal right away if there aren't that many bytes
|
||||
* available.
|
||||
* If presented with a length > 4, this returns false. The Unicode
|
||||
* definition of UTF-8 goes up to 4-byte sequences.
|
||||
*/
|
||||
|
||||
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
UTF8 a;
|
||||
const UTF8 *srcptr = source+length;
|
||||
switch (length) {
|
||||
default: return false;
|
||||
/* Everything else falls through when "true"... */
|
||||
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||
case 2: if ((a = (*--srcptr)) > 0xBF) return false;
|
||||
|
||||
switch (*source) {
|
||||
/* no fall-through in this inner switch */
|
||||
case 0xE0: if (a < 0xA0) return false; break;
|
||||
case 0xED: if (a > 0x9F) return false; break;
|
||||
case 0xF0: if (a < 0x90) return false; break;
|
||||
case 0xF4: if (a > 0x8F) return false; break;
|
||||
default: if (a < 0x80) return false;
|
||||
}
|
||||
|
||||
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
||||
}
|
||||
if (*source > 0xF4) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* Exported function to return whether a UTF-8 sequence is legal or not.
|
||||
* This is not used here; it's just exported.
|
||||
*/
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
||||
int length = trailingBytesForUTF8[*source]+1;
|
||||
if (source+length > sourceEnd) {
|
||||
return false;
|
||||
}
|
||||
return isLegalUTF8(source, length);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF8* source = *sourceStart;
|
||||
UTF16* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
if (source + extraBytesToRead >= sourceEnd) {
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (! isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The cases all fall through. See "Note A" below.
|
||||
*/
|
||||
switch (extraBytesToRead) {
|
||||
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||
case 3: ch += *source++; ch <<= 6;
|
||||
case 2: ch += *source++; ch <<= 6;
|
||||
case 1: ch += *source++; ch <<= 6;
|
||||
case 0: ch += *source++;
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_UTF16) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
source -= (extraBytesToRead+1); /* return to the start */
|
||||
break; /* Bail out; shouldn't continue */
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF32* source = *sourceStart;
|
||||
UTF8* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch;
|
||||
unsigned short bytesToWrite = 0;
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
ch = *source++;
|
||||
if (flags == strictConversion ) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Figure out how many bytes the result will require. Turn any
|
||||
* illegally large UTF32 things (> Plane 17) into replacement chars.
|
||||
*/
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
result = sourceIllegal;
|
||||
}
|
||||
|
||||
target += bytesToWrite;
|
||||
if (target > targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
target -= bytesToWrite; result = targetExhausted; break;
|
||||
}
|
||||
switch (bytesToWrite) { /* note: everything falls through. */
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
const UTF8* source = *sourceStart;
|
||||
UTF32* target = *targetStart;
|
||||
while (source < sourceEnd) {
|
||||
UTF32 ch = 0;
|
||||
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||
if (source + extraBytesToRead >= sourceEnd) {
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (! isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The cases all fall through. See "Note A" below.
|
||||
*/
|
||||
switch (extraBytesToRead) {
|
||||
case 5: ch += *source++; ch <<= 6;
|
||||
case 4: ch += *source++; ch <<= 6;
|
||||
case 3: ch += *source++; ch <<= 6;
|
||||
case 2: ch += *source++; ch <<= 6;
|
||||
case 1: ch += *source++; ch <<= 6;
|
||||
case 0: ch += *source++;
|
||||
}
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up the source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_LEGAL_UTF32) {
|
||||
/*
|
||||
* UTF-16 surrogate values are illegal in UTF-32, and anything
|
||||
* over Plane 17 (> 0x10FFFF) is illegal.
|
||||
*/
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
*target++ = ch;
|
||||
}
|
||||
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
|
||||
result = sourceIllegal;
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
}
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Note A.
|
||||
The fall-through switches in UTF-8 reading code save a
|
||||
temp variable, some decrements & conditionals. The switches
|
||||
are equivalent to the following loop:
|
||||
{
|
||||
int tmpBytesToRead = extraBytesToRead+1;
|
||||
do {
|
||||
ch += *source++;
|
||||
--tmpBytesToRead;
|
||||
if (tmpBytesToRead) ch <<= 6;
|
||||
} while (tmpBytesToRead > 0);
|
||||
}
|
||||
In UTF-8 writing code, the switches on "bytesToWrite" are
|
||||
similarly unrolled loops.
|
||||
|
||||
--------------------------------------------------------------------- */
|
||||
|
|
@ -1,149 +0,0 @@
|
|||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Header file.
|
||||
|
||||
Several funtions are included here, forming a complete set of
|
||||
conversions between the three formats. UTF-7 is not included
|
||||
here, but is handled in a separate source file.
|
||||
|
||||
Each of these routines takes pointers to input buffers and output
|
||||
buffers. The input buffers are const.
|
||||
|
||||
Each routine converts the text between *sourceStart and sourceEnd,
|
||||
putting the result into the buffer between *targetStart and
|
||||
targetEnd. Note: the end pointers are *after* the last item: e.g.
|
||||
*(sourceEnd - 1) is the last item.
|
||||
|
||||
The return result indicates whether the conversion was successful,
|
||||
and if not, whether the problem was in the source or target buffers.
|
||||
(Only the first encountered problem is indicated.)
|
||||
|
||||
After the conversion, *sourceStart and *targetStart are both
|
||||
updated to point to the end of last text successfully converted in
|
||||
the respective buffers.
|
||||
|
||||
Input parameters:
|
||||
sourceStart - pointer to a pointer to the source buffer.
|
||||
The contents of this are modified on return so that
|
||||
it points at the next thing to be converted.
|
||||
targetStart - similarly, pointer to pointer to the target buffer.
|
||||
sourceEnd, targetEnd - respectively pointers to the ends of the
|
||||
two buffers, for overflow checking only.
|
||||
|
||||
These conversion functions take a ConversionFlags argument. When this
|
||||
flag is set to strict, both irregular sequences and isolated surrogates
|
||||
will cause an error. When the flag is set to lenient, both irregular
|
||||
sequences and isolated surrogates are converted.
|
||||
|
||||
Whether the flag is strict or lenient, all illegal sequences will cause
|
||||
an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
|
||||
or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
|
||||
must check for illegal sequences.
|
||||
|
||||
When the flag is set to lenient, characters over 0x10FFFF are converted
|
||||
to the replacement character; otherwise (when the flag is set to strict)
|
||||
they constitute an error.
|
||||
|
||||
Output parameters:
|
||||
The value "sourceIllegal" is returned from some routines if the input
|
||||
sequence is malformed. When "sourceIllegal" is returned, the source
|
||||
value will point to the illegal value that caused the problem. E.g.,
|
||||
in UTF-8 when a sequence is malformed, it points to the start of the
|
||||
malformed sequence.
|
||||
|
||||
Author: Mark E. Davis, 1994.
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Fixes & updates, Sept 2001.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
The following 4 definitions are compiler-specific.
|
||||
The C standard does not guarantee that wchar_t has at least
|
||||
16 bits, so wchar_t is no less portable than unsigned short!
|
||||
All should be unsigned values to avoid sign extension during
|
||||
bit mask & shift operations.
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
typedef unsigned int UTF32; /* at least 32 bits */
|
||||
typedef unsigned short UTF16; /* at least 16 bits */
|
||||
typedef unsigned char UTF8; /* typically 8 bits */
|
||||
typedef unsigned char Boolean; /* 0 or 1 */
|
||||
|
||||
/* Some fundamental constants */
|
||||
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
||||
#define UNI_MAX_BMP (UTF32)0x0000FFFF
|
||||
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
|
||||
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
|
||||
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
|
||||
|
||||
typedef enum {
|
||||
conversionOK, /* conversion successful */
|
||||
sourceExhausted, /* partial character in source, but hit end */
|
||||
targetExhausted, /* insuff. room in target for conversion */
|
||||
sourceIllegal /* source sequence is illegal/malformed */
|
||||
} ConversionResult;
|
||||
|
||||
typedef enum {
|
||||
strictConversion = 0,
|
||||
lenientConversion
|
||||
} ConversionFlags;
|
||||
|
||||
/* This is for C++ and does no harm in C */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32 (
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue