WEB开发网
开发学院软件开发VC JNI中文处理问题小结 阅读

JNI中文处理问题小结

 2007-03-15 21:53:28 来源:WEB开发网   
核心提示: 下面重点讨论JNI中在C++程序与Java程序间进行数据传递时需要注意的问题,在JNI中jstring采用的是UCS-2编码,JNI中文处理问题小结(3),与Java中String的编码方式一致,但是在C++中,因为 operator =(const string& rhs)可以在需要的时

下面重点讨论JNI中在C++程序与Java程序间进行数据传递时需要注意的问题。

在JNI中jstring采用的是UCS-2编码,与Java中String的编码方式一致。但是在C++中,字符串是用char(8位)或者wchar_t(16位,Unicode编码与jchar一致,但并非所有开发平台上都是Unicode编码,详见参考6),下面的程序证明了这一点(编译环境:VC6):

#include <iostream>
using namespace std;
int main()
{
  locale loc( "Chinese-simplified" );
  //locale loc( "chs" );
  //locale loc( "ZHI" );
  //locale loc( ".936" );
  wcout.imbue( loc );
  wcout << L"中文" << endl; //若没有L,会出问题
  wchar_t wch[] = {0x4E2D, 0x6587, 0x0}; //"中文"二字的Unicode编码
  wcout << wch << endl;
  return 0;
}
JNI提供了几个方法来实现jstring与char/wchar_t之间的转换。jsize GetStringLength(jstring str)
const jchar *GetStringChars(jstring str, jboolean *isCopy)
void ReleaseStringChars(jstring str, const jchar *chars)
此外,为了便于以UTF-8方式进行传输、存储,JNI还提供了几个操作UTF格式的方法:jsize GetStringUTFLength(jstring str)
const char* GetStringUTFChars(jstring str, jboolean *isCopy)
void ReleaseStringUTFChars(jstring str, const char* chars)
GetStringChars返回的是Unicode格式的编码串,而GetStringUTFChars返回的是UTF-8格式的编码串。要创建一个jstring,可以用如下方式:jstring NewJString( JNIEnv * env, LPCTSTR str )
{
  if (!env || !str)
    return 0;
  int slen = strlen(str);
  jchar * buffer = new jchar[slen];
  int len = MultiByteToWideChar(CP_ACP, 0, str, strlen(str), buffer, slen);
  if (len > 0 && len < slen)
    buffer[len] = 0;
  jstring js = env->NewString(buffer, len);
  delete [] buffer;
  return js;
}
而要将一个jstring对象转为一个char字符串数组,可以:int JStringToChar( JNIEnv * env, jstring str, LPTSTR desc, int desc_len )
{
  int len = 0;
  if (desc == NULL || str == NULL)
    return -1;
  // Check buffer size
  if (env->GetStringLength(str) * 2 + 1 > desc_len)
  {
    return -2;
  }
  memset(desc, 0, desc_len);
  const wchar_t * w_buffer = env->GetStringChars(str, 0);
  len = WideCharToMultiByte(CP_ACP, 0, w_buffer, wcslen(w_buffer) + 1, desc, desc_len, NULL, NULL);
  env->ReleaseStringChars(str, w_buffer);
  if (len > 0 && len < desc_len)
    desc[len] = 0;
  return strlen(desc);
}
  当然,按照上面的分析,你也可以直接将GetStringChars的返回结果作为wchar_t串来进行操作。或者,如果你愿意,你也可以将GetStringUTFChars的结果通过MultiByteToWideChar转换为UCS2编码串,再通过WideCharToMultiByte转换为多字节串。const char* pstr = env->GetStringUTFChars(str, false);
int nLen = MultiByteToWideChar( CP_UTF8, 0, pstr, -1, NULL, NULL );//得到UTF-8编码的字符串长度
LPWSTR lpwsz = new WCHAR[nLen];  
MultiByteToWideChar( CP_UTF8, 0, pstr, -1, lpwsz, nLen );//转换的结果是UCS2格式的编码串
int nLen1 = WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, NULL, NULL, NULL, NULL );  
LPSTR lpsz = new CHAR[nLen1];
WideCharToMultiByte( CP_ACP, 0, lpwsz, nLen, lpsz, nLen1, NULL, NULL );//将UCS2格式的编码串转换为多字节
cout << "Out:" << lpsz << endl;
delete [] lpwsz; delete [] lpsz;
  当然,我相信很少有人想要或者需要这么做。这里需要注意一点,GetStringChars的返回值是jchar,而GetStringUTFChars的返回值是const char*。除了上面的办法外,当需要经常在jstring和char*之间进行转换时我们还有一个选择,那就是下面的这个类。这个类本来是一个叫Roger S. Reynolds的老外提供的,想法非常棒,但用起来却不太灵光,因为作者将考虑的重心放在UTF格式串上,但在实际操作中,我们往往使用的却是ACP(ANSI code page)串。下面是原作者的程序:class UTFString {
private:
  UTFString (); // Default ctor - disallowed
public:
  // Create a new instance from the specified jstring
  UTFString(JNIEnv* env, const jstring& str) :
    mEnv (env),
    mJstr (str),
    mUtfChars ((char* )mEnv->GetStringUTFChars (mJstr, 0)),
    mString (mUtfChars) { }
  // Create a new instance from the specified string
  UTFString(JNIEnv* env, const string& str) :
    mEnv (env),
    mString (str),
    mJstr (env->NewStringUTF (str.c_str ())),
    mUtfChars ((char* )mEnv->GetStringUTFChars (mJstr, 0)) { }
  // Create a new instance as a copy of the specified UTFString
  UTFString(const UTFString& rhs) :
    mEnv (rhs.mEnv),
    mJstr (mEnv->NewStringUTF (rhs.mUtfChars)),
    mUtfChars ((char* )mEnv->GetStringUTFChars (mJstr, 0)),
    mString (mUtfChars) { }
  // Delete the instance and release allocated storage
  ~UTFString() { mEnv->ReleaseStringUTFChars (mJstr, mUtfChars); }
  // assign a new value to this instance from the given string
  UTFString & operator =(const string& rhs) {
    mEnv->ReleaseStringUTFChars (mJstr, mUtfChars);
    mJstr = mEnv->NewStringUTF (rhs.c_str ());
    mUtfChars = (char* )mEnv->GetStringUTFChars (mJstr, 0);
    mString = mUtfChars;
    return *this;
  }
  // assign a new value to this instance from the given char*
  UTFString & operator =(const char* ptr) {
    mEnv->ReleaseStringUTFChars (mJstr, mUtfChars);
    mJstr = mEnv->NewStringUTF (ptr);
    mUtfChars = (char* )mEnv->GetStringUTFChars (mJstr, 0);
    mString = mUtfChars;
    return *this;
  }
  // Supply operator methods for converting the UTFString to a string
  // or char*, making it easy to pass UTFString arguments to functions
  // that require string or char* parameters.
  string & GetString() { return mString; }
  operator string() { return mString; }
  operator const char* () { return mString.c_str (); }
  operator jstring() { return mJstr; }
private:
  JNIEnv* mEnv;  // The enviroment pointer for this native method.
  jstring mJstr;  // A copy of the jstring object that this UTFString represents
  char* mUtfChars; // Pointer to the data returned by GetStringUTFChars
  string mString; // string buffer for holding the "value" of this instance
};
我将它改了改:class JNIString {
private:
  JNIString (); // Default ctor - disallowed
public:
  // Create a new instance from the specified jstring
  JNIString(JNIEnv* env, const jstring& str) :
    mEnv (env) {
    const jchar* w_buffer = env->GetStringChars (str, 0);
    mJstr = env->NewString (w_buffer,
                wcslen (w_buffer)); // Deep Copy, in usual case we only need
            // Shallow Copy as we just need this class to
            // provide some convenience for handling jstring
    mChars = new char[wcslen (w_buffer) * 2 + 1];
    WideCharToMultiByte (CP_ACP, 0, w_buffer, wcslen (w_buffer) + 1, mChars, wcslen (w_buffer) * 2 + 1,
               NULL,  NULL);
    env->ReleaseStringChars (str, w_buffer);
    mString = mChars;
  }
  // Create a new instance from the specified string
  JNIString(JNIEnv* env, const string& str) :
    mEnv (env) {
    int slen = str.length ();
    jchar* buffer = new jchar[slen];
    int len = MultiByteToWideChar (CP_ACP, 0, str.c_str (), str.length (), buffer, slen);
    if (len > 0 && len < slen)
      buffer[len] = 0;
    mJstr = env->NewString (buffer, len);
    delete [] buffer;
    mChars = new char[str.length () + 1];
    strcpy (mChars, str.c_str ());
    mString.empty ();
    mString = str.c_str ();
  }
  // Create a new instance as a copy of the specified JNIString
  JNIString(const JNIString& rhs) :
    mEnv (rhs.mEnv) {
    const jchar* wstr = mEnv->GetStringChars (rhs.mJstr, 0);
    mJstr = mEnv->NewString (wstr, wcslen (wstr));
    mEnv->ReleaseStringChars (rhs.mJstr, wstr);
    mChars = new char[strlen (rhs.mChars) + 1];
    strcpy (mChars, rhs.mChars);
    mString = rhs.mString.c_str ();
  }
  // Delete the instance and release allocated storage
  ~JNIString() { delete [] mChars; }
  // assign a new value to this instance from the given string
  JNIString & operator =(const string& rhs) {
    delete [] mChars;
    int slen = rhs.length ();
    jchar* buffer = new jchar[slen];
    int len = MultiByteToWideChar (CP_ACP, 0, rhs.c_str (), rhs.length (), buffer, slen);
    if (len > 0 && len < slen)
      buffer[len] = 0;
    mJstr = mEnv->NewString (buffer, len);
    delete [] buffer;
    mChars = new char[rhs.length () + 1];
    strcpy (mChars, rhs.c_str ());
    mString = rhs.c_str ();
    return *this;
  }
  // Supply operator methods for converting the JNIString to a string
  // or char*, making it easy to pass JNIString arguments to functions
  // that require string or char* parameters.
  string & GetString() { return mString; }
  operator string() { return mString; }
  operator const char* () { return mString.c_str (); }
  operator jstring() { return mJstr; }
private:
  JNIEnv* mEnv;  // The enviroment pointer for this native method.
  jstring mJstr; // A copy of the jstring object that this JNIString represents
  char* mChars;  // Pointer to a ANSI code page char array
  string mString; // string buffer for holding the "value" of this instance (ANSI code page)
};
  后者除了将面向UTF编码改成了面向ANSI编码外,还去掉了operator =(const char* ptr)的定义,因为 operator =(const string& rhs)可以在需要的时候替代前者而无需任何额外编码。(因为按照C++规范,const reference可以自动转换,详见本人另一文章“关于 const reference 的几点说明”)

上一页  1 2 3 4  下一页

Tags:JNI处理 问题

编辑录入:爽爽 [复制链接] [打 印]
赞助商链接