PinyinUtils优化

This commit is contained in:
thinkgem
2018-07-14 20:33:45 +08:00
parent 7da392e574
commit 638c57b9c0
2 changed files with 99 additions and 41 deletions

View File

@@ -1,9 +1,12 @@
package com.jeesite.common.text;
import java.util.regex.Pattern;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
/**
@@ -12,34 +15,16 @@ import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombi
*/
public class PinyinUtils {
// /**
// * 将字符串中的中文转化为拼音,其他字符不变
// * @param inputString
// * @return
// */
// public static String getPinyin(String inputString) {
// HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
// format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
// format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
// format.setVCharType(HanyuPinyinVCharType.WITH_V);
//
// char[] input = inputString.trim().toCharArray();
// String output = "";
//
// try {
// for (int i = 0; i < input.length; i++) {
// if (java.lang.Character.toString(input[i]).matches("[\\u4E00-\\u9FA5]+")) {
// String[] temp = PinyinHelper.toHanyuPinyinStringArray(input[i], format);
// output += temp[0];
// } else {
// output += java.lang.Character.toString(input[i]);
// }
// }
// } catch (BadHanyuPinyinOutputFormatCombination e) {
// e.printStackTrace();
// }
// return output;
// }
private static class Static{
private static Pattern idPatt = Pattern.compile("\\W");
private static HanyuPinyinOutputFormat defaultFormat;
static{
defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
}
}
/**
* 获取汉字串拼音首字母,替换调非法标示符字符,英文字符不变,去除空格
@@ -47,17 +32,30 @@ public class PinyinUtils {
* @return 汉语拼音首字母
*/
public static String getFirstSpell(String chinese) {
return getFirstSpell(chinese, true);
}
/**
* 获取汉字串拼音首字母,替换调非法标示符字符,英文字符不变,去除空格
* @param chinese 汉字串
* @param isId 是否标示符true将去掉特殊字符
* @return 汉语拼音首字母
*/
public static String getFirstSpell(String chinese, boolean isId) {
chinese = getDbc(chinese);
if (chinese == null){
return null;
}
StringBuffer pybf = new StringBuffer();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], Static.defaultFormat);
if (temp != null) {
pybf.append(temp[0].charAt(0));
}else{
pybf.append(String.valueOf(arr[i]));
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
@@ -66,7 +64,10 @@ public class PinyinUtils {
pybf.append(arr[i]);
}
}
return pybf.toString().replaceAll("\\W", "").trim();
if (isId){
return Static.idPatt.matcher(pybf.toString()).replaceAll("").trim();
}
return pybf.toString();
}
/**
@@ -75,17 +76,30 @@ public class PinyinUtils {
* @return 汉语拼音
*/
public static String getFullSpell(String chinese) {
return getFullSpell(chinese, true);
}
/**
* 获取汉字串全拼,英文字符不变
* @param chinese 汉字串
* @param isId 是否标示符true将去掉特殊字符
* @return 汉语拼音
*/
public static String getFullSpell(String chinese, boolean isId) {
chinese = getDbc(chinese);
if (chinese == null){
return null;
}
StringBuffer pybf = new StringBuffer();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
String[] ss = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
String[] ss = PinyinHelper.toHanyuPinyinStringArray(arr[i], Static.defaultFormat);
if (ss != null && ss.length > 0){
pybf.append(ss[0]);
}else{
pybf.append(String.valueOf(arr[i]));
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
@@ -94,13 +108,57 @@ public class PinyinUtils {
pybf.append(arr[i]);
}
}
if (isId){
return Static.idPatt.matcher(pybf.toString()).replaceAll("").trim();
}
return pybf.toString();
}
/**
* 半角转全角
* @param input String.
* @return 全角字符串.
*/
public static String getSbc(String input) {
if (input == null){
return null;
}
char c[] = input.toCharArray();
for (int i = 0; i < c.length; i++) {
if (c[i] == ' ') {
c[i] = '\u3000';
} else if (c[i] < '\177') {
c[i] = (char) (c[i] + 65248);
}
}
return new String(c);
}
/**
* 全角转半角
* @param input String.
* @return 半角字符串
*/
public static String getDbc(String input) {
if (input == null){
return null;
}
char c[] = input.toCharArray();
for (int i = 0; i < c.length; i++) {
if (c[i] == '\u3000') {
c[i] = ' ';
} else if (c[i] > '\uFF00' && c[i] < '\uFF5F') {
c[i] = (char) (c[i] - 65248);
}
}
return new String(c);
}
// public static void main(String[] args) {
// String str = "你好123世界abc,~!#$_Sdf";
//// System.out.println(getPinyin(str));
// String str = "你好123世界abc,~!#$_Sdf,女;hello!-";
// System.out.println(getFirstSpell(str));
// System.out.println(getFirstSpell(str, false));
// System.out.println(getFullSpell(str));
// System.out.println(getFullSpell(str, false));
// }
}

View File

@@ -160,7 +160,7 @@ user:
account:
# 注册用户
registerUser:
enabled: true
enabled: false
userTypes: 0, 1
# 任务调度(个人版+
@@ -369,7 +369,7 @@ web:
validator:
id: '[a-zA-Z0-9_\-/\u4e00-\u9fa5]{0,64}'
user.loginCode: '[a-zA-Z0-9_\u4e00-\u9fa5]{4,20}'
# 错误页面500.html是否输出错误信息正式环境为提供安全性可设置为false
error:
page: