概念参考:https://blog.csdn.net/zbc1090549839/article/details/44103801

线性归一化

公式:X(norm) = (X - min) / (max - min)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/**
* 线性归一化 公式:X(norm) = (X - min) / (max - min)
*
* @param points 原始数据
* @return 归一化后的数据
*/
public static double[][] normalize4Scale(double[][] points) {
if (points == null || points.length < 1) {
return points;
}
double[][] p = new double[points.length][points[0].length];
double[] matrixJ;
double maxV;
double minV;
for (int j = 0; j < points[0].length; j++) {
matrixJ = getMatrixCol(points, j);
maxV = maxV(matrixJ);
minV = minV(matrixJ);
for (int i = 0; i < points.length; i++) {
p[i][j] = maxV == minV ? minV : (points[i][j] - minV) / (maxV - minV);
}
}
return p;
}

/**
* 获取矩阵的某一列
*
* @param points points
* @param column column
* @return double[]
*/
public static double[] getMatrixCol(double[][] points, int column) {
double[] matrixJ = new double[points.length];
for (int i = 0; i < points.length; i++) {
matrixJ[i] = points[i][column];
}
return matrixJ;
}

/**
* 获取数组中的最小值
*
* @param matrixJ matrixJ
* @return v
*/
public static double minV(double[] matrixJ) {
double v = matrixJ[0];
for (int i = 0; i < matrixJ.length; i++) {
if (matrixJ[i] < v) {
v = matrixJ[i];
}
}
return v;
}

/**
* 获取数组中的最大值
*
* @param matrixJ matrixJ
* @return v
*/
public static double maxV(double[] matrixJ) {
double v = matrixJ[0];
for (int i = 0; i < matrixJ.length; i++) {
if (matrixJ[i] > v) {
v = matrixJ[i];
}
}
return v;
}

0均值\标准差归一化

公式:X(norm) = (X - μ) / σ = (X - 均值) / 标准差

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/**
* 0均值\标准差归一化 公式:X(norm) = (X - μ) / σ
* X(norm) = (X - 均值) / 标准差
*
* @param points 原始数据
* @return 归一化后的数据
*/
public static double[][] normalize4ZScore(double[][] points) {
if (points == null || points.length < 1) {
return points;
}
double[][] p = new double[points.length][points[0].length];
double[] matrixJ;
double avg;
double std;
for (int j = 0; j < points[0].length; j++) {
matrixJ = getMatrixCol(points, j);
avg = average(matrixJ);
std = standardDeviation(matrixJ);
for (int i = 0; i < points.length; i++) {
p[i][j] = std == 0 ? points[i][j] : (points[i][j] - avg) / std;
}
}
return p;
}

/**
* 方差s^2=[(x1-x)^2 +...(xn-x)^2]/n
*
* @param x x
* @return 方差
*/
public static double variance(double[] x) {
int m = x.length;
double sum = 0;
for (int i = 0; i < m; i++) {//求和
sum += x[i];
}
double dAve = sum / m;//求平均值
double dVar = 0;
for (int i = 0; i < m; i++) {//求方差
dVar += (x[i] - dAve) * (x[i] - dAve);
}
return dVar / m;
}

/**
* 标准差σ=sqrt(s^2)
*
* @param x x
* @return 标准差
*/
public static double standardDeviation(double[] x) {
return Math.sqrt(variance(x));
}

/**
* 平均值
*
* @param x x
* @return 平均值
*/
public static double average(double[] x) {
int m = x.length;
double sum = 0;
for (int i = 0; i < m; i++) {
sum += x[i];
}
double dAve = sum / m;
return dAve;
}

调用

1
2
3
4
5
public static void main(String[] args) {
double[][] points = {{2, 5, 7}, {3, 1, 5}, {0, 27, 11}, {109, 6, 1}};
double[][] p1 = normalize4Scale(points);
double[][] p2 = normalize4ZScore(points);
}