1143 lines
43 KiB
Plaintext
1143 lines
43 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:25:11.758935Z",
|
||
"start_time": "2025-05-14T06:25:11.434128Z"
|
||
}
|
||
},
|
||
"source": "import pandas as pd",
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:25:41.010808Z",
|
||
"start_time": "2025-05-14T06:25:40.997687Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"data = pd.read_csv('./data/shill_bidding.csv',encoding='gbk')\n",
|
||
"data"
|
||
],
|
||
"id": "be73a1ac2c569ba7",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 记录ID 拍卖ID 竞标者倾向 竞标比率 连续竞标 上次竞标 竞标量 拍卖起拍 \\\n",
|
||
"0 1 732 0.200000 0.400000 0.0 0.000028 0.000000 0.993593 \n",
|
||
"1 2 732 0.024390 0.200000 0.0 0.013123 0.000000 0.993593 \n",
|
||
"2 3 732 0.142857 0.200000 0.0 0.003042 0.000000 0.993593 \n",
|
||
"3 4 732 0.100000 0.200000 0.0 0.097477 0.000000 0.993593 \n",
|
||
"4 5 900 0.051282 0.222222 0.0 0.001318 0.000000 0.000000 \n",
|
||
"... ... ... ... ... ... ... ... ... \n",
|
||
"6316 15129 760 0.333333 0.160000 1.0 0.738557 0.280000 0.993593 \n",
|
||
"6317 15137 2481 0.030612 0.130435 0.0 0.005754 0.217391 0.993593 \n",
|
||
"6318 15138 2481 0.055556 0.043478 0.0 0.015663 0.217391 0.993593 \n",
|
||
"6319 15139 2481 0.076923 0.086957 0.0 0.068694 0.217391 0.993593 \n",
|
||
"6320 15144 2481 0.016393 0.043478 0.0 0.340351 0.217391 0.993593 \n",
|
||
"\n",
|
||
" 早期竞标 胜率 拍卖持续时间(小时) 类别 \n",
|
||
"0 0.000028 0.666667 5 0 \n",
|
||
"1 0.013123 0.944444 5 0 \n",
|
||
"2 0.003042 1.000000 5 0 \n",
|
||
"3 0.097477 1.000000 5 0 \n",
|
||
"4 0.001242 0.500000 7 0 \n",
|
||
"... ... ... ... .. \n",
|
||
"6316 0.686358 0.888889 3 1 \n",
|
||
"6317 0.000010 0.878788 7 0 \n",
|
||
"6318 0.015663 0.000000 7 0 \n",
|
||
"6319 0.000415 0.000000 7 0 \n",
|
||
"6320 0.340351 0.000000 7 0 \n",
|
||
"\n",
|
||
"[6321 rows x 12 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>记录ID</th>\n",
|
||
" <th>拍卖ID</th>\n",
|
||
" <th>竞标者倾向</th>\n",
|
||
" <th>竞标比率</th>\n",
|
||
" <th>连续竞标</th>\n",
|
||
" <th>上次竞标</th>\n",
|
||
" <th>竞标量</th>\n",
|
||
" <th>拍卖起拍</th>\n",
|
||
" <th>早期竞标</th>\n",
|
||
" <th>胜率</th>\n",
|
||
" <th>拍卖持续时间(小时)</th>\n",
|
||
" <th>类别</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.400000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.024390</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.944444</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.142857</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.100000</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>900</td>\n",
|
||
" <td>0.051282</td>\n",
|
||
" <td>0.222222</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.001318</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.001242</td>\n",
|
||
" <td>0.500000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6316</th>\n",
|
||
" <td>15129</td>\n",
|
||
" <td>760</td>\n",
|
||
" <td>0.333333</td>\n",
|
||
" <td>0.160000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.738557</td>\n",
|
||
" <td>0.280000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.686358</td>\n",
|
||
" <td>0.888889</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6317</th>\n",
|
||
" <td>15137</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.030612</td>\n",
|
||
" <td>0.130435</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.005754</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000010</td>\n",
|
||
" <td>0.878788</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6318</th>\n",
|
||
" <td>15138</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.055556</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6319</th>\n",
|
||
" <td>15139</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.076923</td>\n",
|
||
" <td>0.086957</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.068694</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000415</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6320</th>\n",
|
||
" <td>15144</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.016393</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6321 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 5
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:41.114487Z",
|
||
"start_time": "2025-05-14T06:28:41.087602Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"X = data.iloc[:, :-1] # 特征数据\n",
|
||
"X"
|
||
],
|
||
"id": "9c57d5b64979660f",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 记录ID 拍卖ID 竞标者倾向 竞标比率 连续竞标 上次竞标 竞标量 拍卖起拍 \\\n",
|
||
"0 1 732 0.200000 0.400000 0.0 0.000028 0.000000 0.993593 \n",
|
||
"1 2 732 0.024390 0.200000 0.0 0.013123 0.000000 0.993593 \n",
|
||
"2 3 732 0.142857 0.200000 0.0 0.003042 0.000000 0.993593 \n",
|
||
"3 4 732 0.100000 0.200000 0.0 0.097477 0.000000 0.993593 \n",
|
||
"4 5 900 0.051282 0.222222 0.0 0.001318 0.000000 0.000000 \n",
|
||
"... ... ... ... ... ... ... ... ... \n",
|
||
"6316 15129 760 0.333333 0.160000 1.0 0.738557 0.280000 0.993593 \n",
|
||
"6317 15137 2481 0.030612 0.130435 0.0 0.005754 0.217391 0.993593 \n",
|
||
"6318 15138 2481 0.055556 0.043478 0.0 0.015663 0.217391 0.993593 \n",
|
||
"6319 15139 2481 0.076923 0.086957 0.0 0.068694 0.217391 0.993593 \n",
|
||
"6320 15144 2481 0.016393 0.043478 0.0 0.340351 0.217391 0.993593 \n",
|
||
"\n",
|
||
" 早期竞标 胜率 拍卖持续时间(小时) \n",
|
||
"0 0.000028 0.666667 5 \n",
|
||
"1 0.013123 0.944444 5 \n",
|
||
"2 0.003042 1.000000 5 \n",
|
||
"3 0.097477 1.000000 5 \n",
|
||
"4 0.001242 0.500000 7 \n",
|
||
"... ... ... ... \n",
|
||
"6316 0.686358 0.888889 3 \n",
|
||
"6317 0.000010 0.878788 7 \n",
|
||
"6318 0.015663 0.000000 7 \n",
|
||
"6319 0.000415 0.000000 7 \n",
|
||
"6320 0.340351 0.000000 7 \n",
|
||
"\n",
|
||
"[6321 rows x 11 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>记录ID</th>\n",
|
||
" <th>拍卖ID</th>\n",
|
||
" <th>竞标者倾向</th>\n",
|
||
" <th>竞标比率</th>\n",
|
||
" <th>连续竞标</th>\n",
|
||
" <th>上次竞标</th>\n",
|
||
" <th>竞标量</th>\n",
|
||
" <th>拍卖起拍</th>\n",
|
||
" <th>早期竞标</th>\n",
|
||
" <th>胜率</th>\n",
|
||
" <th>拍卖持续时间(小时)</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.400000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.024390</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.944444</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.142857</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.100000</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>900</td>\n",
|
||
" <td>0.051282</td>\n",
|
||
" <td>0.222222</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.001318</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.001242</td>\n",
|
||
" <td>0.500000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6316</th>\n",
|
||
" <td>15129</td>\n",
|
||
" <td>760</td>\n",
|
||
" <td>0.333333</td>\n",
|
||
" <td>0.160000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.738557</td>\n",
|
||
" <td>0.280000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.686358</td>\n",
|
||
" <td>0.888889</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6317</th>\n",
|
||
" <td>15137</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.030612</td>\n",
|
||
" <td>0.130435</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.005754</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000010</td>\n",
|
||
" <td>0.878788</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6318</th>\n",
|
||
" <td>15138</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.055556</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6319</th>\n",
|
||
" <td>15139</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.076923</td>\n",
|
||
" <td>0.086957</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.068694</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000415</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6320</th>\n",
|
||
" <td>15144</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.016393</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6321 rows × 11 columns</p>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 9
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:43.271751Z",
|
||
"start_time": "2025-05-14T06:28:43.268190Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"y = data.iloc[:, -1] # 标签数据\n",
|
||
"y"
|
||
],
|
||
"id": "cd9249ade901dbc2",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0 0\n",
|
||
"1 0\n",
|
||
"2 0\n",
|
||
"3 0\n",
|
||
"4 0\n",
|
||
" ..\n",
|
||
"6316 1\n",
|
||
"6317 0\n",
|
||
"6318 0\n",
|
||
"6319 0\n",
|
||
"6320 0\n",
|
||
"Name: 类别, Length: 6321, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 10
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:45.332753Z",
|
||
"start_time": "2025-05-14T06:28:45.330224Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "from sklearn.model_selection import train_test_split",
|
||
"id": "8dc41c605cd3157e",
|
||
"outputs": [],
|
||
"execution_count": 11
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:56.522996Z",
|
||
"start_time": "2025-05-14T06:28:56.503381Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)",
|
||
"id": "9f85e76994ed4850",
|
||
"outputs": [],
|
||
"execution_count": 13
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:29:39.357247Z",
|
||
"start_time": "2025-05-14T06:29:39.346520Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(\"训练集特征数量:\", len(X_train))\n",
|
||
"print(\"测试集特征数量:\", len(X_test))\n",
|
||
"print(\"训练集标签数量:\", len(y_train))\n",
|
||
"print(\"测试集标签数量:\", len(y_test))"
|
||
],
|
||
"id": "9e7a486068bba773",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"训练集特征数量: 5056\n",
|
||
"测试集特征数量: 1265\n",
|
||
"训练集标签数量: 5056\n",
|
||
"测试集标签数量: 1265\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 14
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:32:48.037308Z",
|
||
"start_time": "2025-05-14T06:32:48.032154Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "from sklearn.decomposition import PCA",
|
||
"id": "310a87e99029912f",
|
||
"outputs": [],
|
||
"execution_count": 16
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:16.933711Z",
|
||
"start_time": "2025-05-14T06:33:16.923897Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "pca = PCA(n_components=0.999)",
|
||
"id": "b2a983bb0cafe05e",
|
||
"outputs": [],
|
||
"execution_count": 17
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:18.638792Z",
|
||
"start_time": "2025-05-14T06:33:18.615773Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "X_train_pca = pca.fit_transform(X_train)",
|
||
"id": "1e4efd6f383f884d",
|
||
"outputs": [],
|
||
"execution_count": 18
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:19.872466Z",
|
||
"start_time": "2025-05-14T06:33:19.869305Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "X_test_pca = pca.transform(X_test)",
|
||
"id": "3726f7c7adee3b78",
|
||
"outputs": [],
|
||
"execution_count": 19
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:29.743054Z",
|
||
"start_time": "2025-05-14T06:33:29.740149Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(\"降维后训练集大小:\", X_train_pca.shape)\n",
|
||
"print(\"降维后测试集大小:\", X_test_pca.shape)"
|
||
],
|
||
"id": "3a492d1ae08c79d7",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"降维后训练集大小: (5056, 2)\n",
|
||
"降维后测试集大小: (1265, 2)\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 20
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:33.158908Z",
|
||
"start_time": "2025-05-14T06:34:33.156943Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.metrics import accuracy_score"
|
||
],
|
||
"id": "90b993d18fc3c8fd",
|
||
"outputs": [],
|
||
"execution_count": 22
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:34.787786Z",
|
||
"start_time": "2025-05-14T06:34:34.708147Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"model = LogisticRegression()\n",
|
||
"model.fit(X_train_pca, y_train)"
|
||
],
|
||
"id": "555998c0a33ff564",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"LogisticRegression()"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 23
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:44.692219Z",
|
||
"start_time": "2025-05-14T06:34:44.689245Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"y_pred = model.predict(X_test_pca)\n",
|
||
"accuracy = accuracy_score(y_test, y_pred)"
|
||
],
|
||
"id": "550e99b2717dd70a",
|
||
"outputs": [],
|
||
"execution_count": 24
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:48.357752Z",
|
||
"start_time": "2025-05-14T06:34:48.355434Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "print(f\"模型在测试集上的准确率: {accuracy * 100:.2f}%\")",
|
||
"id": "d371ece25527b933",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"模型在测试集上的准确率: 89.57%\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 25
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:35:35.499762Z",
|
||
"start_time": "2025-05-14T06:35:34.902637Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix\n",
|
||
"import seaborn as sns\n",
|
||
"import matplotlib.pyplot as plt"
|
||
],
|
||
"id": "30954934fe9605f0",
|
||
"outputs": [],
|
||
"execution_count": 26
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:35:44.931054Z",
|
||
"start_time": "2025-05-14T06:35:44.926161Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "precision = precision_score(y_test, y_pred)",
|
||
"id": "b1e217f2b2899fd5",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/Volumes/Data/Environment/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 27
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:35:53.539971Z",
|
||
"start_time": "2025-05-14T06:35:53.536968Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "recall = recall_score(y_test, y_pred)",
|
||
"id": "d319c92518dfa777",
|
||
"outputs": [],
|
||
"execution_count": 28
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:36:02.345297Z",
|
||
"start_time": "2025-05-14T06:36:02.342210Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "f1 = f1_score(y_test, y_pred)",
|
||
"id": "a99ded51edcc3c2a",
|
||
"outputs": [],
|
||
"execution_count": 29
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:36:11.818015Z",
|
||
"start_time": "2025-05-14T06:36:11.815129Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(f\"精确率: {precision * 100:.2f}%\")\n",
|
||
"print(f\"召回率: {recall * 100:.2f}%\")\n",
|
||
"print(f\"F1 值: {f1 * 100:.2f}%\")"
|
||
],
|
||
"id": "eeeb022d06ac0cdd",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"精确率: 0.00%\n",
|
||
"召回率: 0.00%\n",
|
||
"F1 值: 0.00%\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 30
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:36:20.417042Z",
|
||
"start_time": "2025-05-14T06:36:20.411479Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "cm = confusion_matrix(y_test, y_pred)",
|
||
"id": "2e7c7fcd91a7da1b",
|
||
"outputs": [],
|
||
"execution_count": 31
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:37:03.532720Z",
|
||
"start_time": "2025-05-14T06:37:03.505902Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from matplotlib import font_manager as fm\n",
|
||
"import matplotlib as mpl\n",
|
||
"\n",
|
||
"font_path = '/System/Library/Fonts/STHeiti Medium.ttc'\n",
|
||
"my_font = fm.FontProperties(fname=font_path)\n",
|
||
"mpl.rcParams['font.family'] = my_font.get_name()\n",
|
||
"mpl.rcParams['axes.unicode_minus'] = False"
|
||
],
|
||
"id": "d6a1fd6bb39c2e86",
|
||
"outputs": [],
|
||
"execution_count": 33
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:37:05.154764Z",
|
||
"start_time": "2025-05-14T06:37:05.074128Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')\n",
|
||
"# annot=True 表示在热力图上显示具体数值,fmt='d' 表示以整数形式显示\n",
|
||
"plt.xlabel('预测标签')\n",
|
||
"plt.ylabel('真实标签')\n",
|
||
"plt.title('混淆矩阵')\n",
|
||
"plt.show()"
|
||
],
|
||
"id": "8fd3248dde46e851",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<Figure size 576x432 with 2 Axes>"
|
||
],
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAdQAAAGBCAYAAADfZEAoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgAUlEQVR4nO3deZQdZbnv8e/TSSchKIMJhkFlEoUgCkowETmO4AQqwgENR8WLJxAUD6iAV7iIKCAHFESDEECvIihIREYR0OtIIIlhuCqozIJMiQkoAobkOX9UNW6TdDrdu3ZXp/r7WatWdw1d9e6sJL9+3nrrrchMJElSe7rqboAkSU1goEqSVAEDVZKkChiokiRVwECVJKkCBqokSRUwUKUWETFyJdsiIkaU33cvt31cy/qWEbHHcj87IiJmRsTW5fqoiIjljhndcv7RrdvLrztHxIyeYyQNTQaqhr2IOCUiji2D7oqIuD0i5vUswAJgt/Lw4yLi0ogYD2wK/CwiLoiIQ4EfAZsvF8rbA28D7irXjwSejoiFEfFQRDwCLAK2K/f/JiIeiYi/AaeWAf5m4KnMXFq2d+0O/VFIasMKv41Lw9ApwA+Bh4GngIMy86c9OyPiu8CScvUY4CvAGzLze2WQngu8BHgSWAyMAJ4pj98NWB/4XVlhfgb4CXBsZt6wkrY8DUwEzgNuBX4LbAbcFhE3l+dehyLMJQ0hBqqGvcx8KCJ2BR4H3gKcFRF/bTlkC+Cc8vtRmXlQRIyMiGsoqssZFIH8DEVgPgr8sKwuDwDeBFwGvAL4OzC19foRMTozn27ZNB7YMDPPjIg/APsD3wYWZuavK/zokipkl6+GtYjYIiJ+DmyRmf8oNx+YmTv2LMA15bHjgT9ExKnAK4H7gAeBj1NUrgeU6++OiG2A9YAvALcBf8/MhzLz8fIa50fEbyLifuCs5Zq1B3BhWdEeC3wO2At4afV/ApKqYoWqYS0z74qIc4GLI2I7im7fkyNiacth3cA/MnNBRGwLvA64B/gmcCjwdeBCYBkwplz+DDyPoto8FFgrIg4HbinPuV9m3hAR+wNTlmvWFcCpwB3AJOBiYGNgt4g4CpiVmcdU9WcgqRoGqoa9zPxmRPw4Mx+LiMnAm8rwPJrinub5wAvLYxdHxF2Z+UhEvAnYE5gDvBEIiu7h/yjPtQFFuE6g6E4excr/zS3fU/QosBB4Atg4MxdFxJnATymCezSShhy7fDXsRcRYYHbZxboucHVEXFbunkTR5fuG8tgXAZdHxIeBT1KMAL66XH4I/IkihMnMOzLzWIrABfhqZl5Vfv/diLgdOIp/DmDq8TJgQ4rBTI+37sji9VD+IiwNQf7DlOBdwK8yc2n5iOhkiu7bT1OM+n0N/wy9XYFrKSrIXYH5wHUt53rVSs4/FZgLXFoOfgJ4by+jfIPiMZlFwIspRvc+BrwI2LWsmp+kCHpJQ4iBKsEhwBkt65+hqBDvAeZk5uMRMSciPgC8Dzg3My+BYnIHisduemzReuKIeDfwAorRwz8BDmS5nqGI6AK6y5G+I4DTMnNBufu48pizgJ9m5nfa/rSSOsIuXw1rEbEFxTOkP46IV1AE2kKKkbt3A8dHRM/zqQ9S/Jv5WcspnpuZkzNzMkVFu7BcKGdH+jowLTOXUYze3RR4LUW3cc/EDo8DHy3PN5aV/6K7FuCEDtIQFsUtGWn4ioi1M/OJ8vsxmfnUAM8zhuI259Mt217V+uxoRHSV4br8z0b6j1FaoxmokiRVwC5fSZIqYKBKklSBITfKd60dPmoftBph0dyv1t0EqRJjRhJ9H9V/7f5//+RNX+1IuwbKClWSpAoMuQpVkjRMRLNqOgNVklSPGFI9tm0zUCVJ9WhYhdqsTyNJUk2sUCVJ9bDLV5KkCjSsy9dAlSTVwwpVkqQKNKxCbdankSSpJlaokqR62OUrSVIFGtbla6BKkuphhSpJUgUaVqE269NIklQTK1RJUj3s8pUkqQIN6/I1UCVJ9WhYoDbr00iSVBMrVElSPbq8hypJUvsa1uVroEqS6uEoX0mSKtCwCrVZn0aSpJpYoUqS6mGXryRJFWhYl6+BKkmqhxWqJEkVaFiF2qxPI0lSTaxQJUn1sMtXkqQKNKzL10CVJNWjYRVqs349kCSpJlaokqR6NKzLt1mfRpK05oiu9pbVuUTEPhFxf0TsXa5PiYgbI2JORMyIiO5yOSMi5kbE7IiY0tuxq7qWgSpJqkdEe8tqyMyLgHOKy0UXcBawb2buBPwNOADYrzg0JwF7A1+LiNG9HNsrA1WSVI9BqFCXsyVwV2beU67PAPYAdgXOBsjMB4D5wMt7ObZX3kOVJA0X44GHW9YfAiZQZOHy29/ay7G9skKVJNWjzS7fiJgWEfNalml9XHEBRaj22JAiNFe2/epeju2VFaokqR5tjvLNzJnAzH78yJ3AFhGxSdm1exBwOfAUsD/wiYjYCNgBuLWXY3tloEqS6jHIEztk5rKI+AgwKyICmAucW+4+PSLmAEuA6Zn59CqOXSkDVZJUixikQM3MY1u+vx6YvJLDpq/k53o7dqW8hypJUgWsUCVJtRisCnWwGKiSpHo0K08NVElSPZpWoXoPVZKkClihSpJq0bQK1UCVJNXCQJUkqQIGqiRJVWhWnjooSZKkKlihSpJqYZevJEkVMFAlSaqAgSpJUgWaFqgOSpIkqQJWqJKkejSrQDVQJUn1aFqXr4EqSapF0wLVe6iSJFXAClWSVIumVagGqiSpHs3KUwNVklQPK1RJkirQtEB1UJIkSRWwQpUk1aJpFaqBKkmqhYEqSVIVmpWn3kOVJKkKVqiSpFrY5StJUgUMVEmSKmCgSpJUhWblqYOSJEmqghWqJKkWdvlKklSBpgWqXb4Ns9euO3DH1Z9jzzdvD0D3yBFcfNqBzPvep589ZvSokXzj+A8y+ztH8uOvH8YrJ74IgN12nsjs7xzJL799OKccvlcdzZdWy5IlSzj+uGOZus9evH/qvtxy8011N0kDEBFtLUONgdows669iW9cMvvZ9SXPLGXvQ8/6l2NeuOHzuOS6m5jyvpP42AkXcuJhewKw4fh12OPgGezy/lPYcPy67PKqrQa17dLquuqKy4kILrhoFqecejrHH3csS5YsqbtZ6icDdTVExIiIeFVEvL38OqIT19HA3HHfI1z2/27llMP34sbvforrb7oTgG9degPPXXsM9/74BHbZcSv+eO/DNbdUWrkbZv+K9+y9DwATJkxg64kT+f3tt9XcKg13lQdqREwG5gHTgZ2Bg4E5EbFT1ddSez558iy2f8/neeyvTz677e77F7D5rkdx/JlX8bx1166xdVLvFi1azLjx455dHz9+AxYuXFhjizQg0eYyxHSiQv0i8I7M/HBmHpWZBwC7l9tXKiKmRcS8iJj3zILfdqBJarXjtpsybr0iLO+47xEmvngjXrbVxs928S5duoyrf/lbjj7o7XU2U+rV+uuvx+JFi55dX7DgUcaNG7eKn9BQZJdv37oz88+tGzLzQWBUbz+QmTMzc8fM3HHk+G070CS12narjTnsA28GYN3nrMWkl23GgkV/44tH7M2Ecc8FYPfXb8efH1lcYyul3k2esjOX/uASAB599BFuv+02Xrr1NjW3Sv3VtEDtxGMzcyPiK8DZwMPAeOBDwK87cC0NwHmX3cBXjnovvzjvk0QEnz3jCh5a8DiHnnghF31pGhHBfQ/+hYM/d0HdTZVW6u2778FJJ36eqfvuTffIkRx9zLF0d3fX3SwNc5GZ1Z4wYiSwH7ArRZg+ClwHnJ+Zz/T182vt8NFqGyTVZNHcr9bdBKkSY0Z25o7liz/5w7b+v7/jlLcNqTK18gq1DM1vloskSSs1FLtt2+FMSZKkWjQsTw1USVI9mlahOlOSJEkVsEKVJNWiYQWqgSpJqkdXV7MS1UCVJNXCClWSpAp0clBSRKwDnANsCTwFfLLcdRrFTMBzgUPLbV8GJgHPAB/PzNkMgIEqSWqiKcBfgR2BzYCLgNHAOzPznog4CTiAImwzMydFxCbAlRExKTP7/T5AR/lKkmoR0d7Sh+uAbYE/A38AHgHuysx7yv0zgD0oZvU7GyAzHwDmA9sP5PMYqJKkWrQ7OX7rm8rKZVrL6Q8DbgQ2AV4C/JJifvkeDwETKKbIXdn2frPLV5JUi3bvoWbmTGBmL7vfCBycmcuAuyPiZcCYlv0bUgTpYopQfXC57f1mhSpJaqJbKLpziYj1KQYdvbK8TwpwEHA5cC2wf3ncRsAOwM0DuaAVqiSpFh1+bOYE4OyIOICiMv08cAcwK4rSeC5wbnns6RExB1gCTB/IgCQwUCVJNenkYzOZ+VfgvSvZNXkl26ZXcU0DVZJUCyd2kCSpAr5tRpIkrcAKVZJUi4YVqAaqJKkeTevyNVAlSbVoWJ4aqJKkejStQnVQkiRJFbBClSTVomEFqoEqSapH07p8DVRJUi0alqfeQ5UkqQpWqJKkWtjlK0lSBRqWpwaqJKkeVqiSJFWgaYHqoCRJkipghSpJqkXDClQDVZJUj6Z1+RqokqRaNCxPDVRJUj2aVqE6KEmSpApYoUqSatGwAtVAlSTVo6thiWqgSpJq0bA89R6qJElVsEKVJNWiaaN8DVRJUi26mpWnBqokqR5WqJIkVaBheeqgJEmSqmCFKkmqRdCsEtVAlSTVwkFJkiRVwEFJkiRVoGF56qAkSZKqYIUqSaqFk+NLklSBhuWpgSpJqkfTBiV5D1WSpApYoUqSatGwAtVAlSTVw0FJkiRVoFlxaqBKkmrioCRJkrSCPgM1It7e8v1bImKDzjZJkjQcdEV7y1CzOhXqYRHRXX7/WeAFPTsi4uqIGNWRlkmSGi0i2lqGmlUGalmNbgZ8MyLGAyOAtSLisIhYC3heZv6j882UJDVNRHvLUNNXhXoesCAzpwKfpgjU+4BngMuAjTvbPEmS1gx9BerbgAsi4gTgD8BiYBmwFPgP4O6Otk6S1FjDqss3MxPYG7gX2LzcvDkwiiJYn+po6yRJjTWYg5Ii4ryIeHlETImIGyNiTkTMiIjucjkjIuZGxOyImDKQz7PK51AjYjvgOcAOwJ+ATYCbgOcDOwHdvf+0JEm9G6wqMyKmA+sBvwFuBt6ZmfdExEnAARTFYWbmpIjYBLgyIiZl5pL+XKeviR1OBtYGDgYSeDPFKN9XARcA6/TnYpIk9RiMOI2IlwNHATsCWwJ3ZeY95e4ZwNcobmeeDJCZD0TEfGB7YG5/rtVXl+9bywueVnb/ngV8PDOPzszfAde0PFIjSdKgiYhpETGvZZm23P5u4Dvl6mXAe4GHWw55CJgAjO9le7/0OfVgZn4lIsaVq5cC81r2faq/F5QkCdqfHD8zZwIzV3HIm4A7KcYCjaUYSDu/Zf+GFEG6mCJUH1xue7+s7tSDj0XEv2Xmk5l5R0S8OiJ26e/FJEnqMQjPof4dWFTOl/AE8Edgi/I+KcBBwOXAtcD+RZtiI4pxQzf39/P0NbHDOeW3I4DTImJERHwBOBFH+EqS2tDpx2Yy8+fA/RExD7ie4rblfsCsiLiRYhzQucD5wNiImANcDEzv74Ak6LvLd4eyUU9HxD8oHpWZb1evJKldgzHINzOPohiU1GrySg6d3u61+grU1mkFXw78FiAiji1/9oHMfEO7jZAkaU3X1z3UMQAR0QX8JjMnAl8GJmXmS4DfOTm+JGkguiLaWoaavirUb5Rfx/DPavUFwI8i4jrgCCfHlyQNxBDMxLb09Rzq6RHxCuD2zHxtRBwHrAu8Hni6XCRJ6remzeXb19SDl5ffrhsRFwAbAI8C36II40uAPTraQkmS1gB9dfkeSjFbxLYUkzq8G1gIfB14gA5MPXj7dV+s+pSSpCFodSdCWFOsMlAz886IeBo4JzMvBC6MiH8HtsrMm4BHBqORkqTmGYrdtu1YnakH7wdOaNk0h+Il45IkDVh/X8E21PU1U1JExKKIuK1l88+B7oh4QzlISZKkfhvM96EOhtV5wfjvMnObls2Ly0dlTgZ+2cnGSZK0puizyxdYGhEvBB7PzMeAv0bEO4FbM/OazjZPktRUw+YeakSsTTGadxRwCLBrRIwFNqKY3OHwQWmhJKmRhmK3bTtW1eX7d4rZ+Z/JzCMoRvq+FLiV4h1zUwehfZKkhhqE17cNql4DNQtfBjIi3g68MCJOBILirefvj4hNB6mdkqSGadpcvqvzXG0XRaV6MfA8YK3yPXFnA4d1sG2SJK0x+pp6MIBXAtdn5sSI+ANwa7n9KuAdg9BGSVIDDbeZkhJYq2X98Yh4H/CWzLw6It7T6QZKkpppCPbatqWviR12johprdsyczbwkYjYBPhAJxsnSWqupt1D7es51KXA2Ii4arljXwwcCMzvVMMkSVqT9NWF/Q9gCcWzqJcC3cD5FEG6c2b+oKOtkyQ1VtMem1nVxA4BfIwiUEcC2bIsA/YajAZKkpqpaRM7rKrLdxTFO0+3AEZQPH9Ky9dRHWyXJKnhhuJ90HasamKHp4HvU0yA/xRFZUr5dQRwTkSM6HgLJUmN1LQu377uoXaXy1Jgz/Lr+4EdgO8B+3a0dZIkrSH6GuXbBTyRmW9t3RgRVwKXUEyaL0lSvw2ne6hk5vUU0w4u74zM/BtwYkdaJUlqvKBZibo670NdQWZeWXVDJEnDy7CqUCVJ6pSmBWrT5iaWJKkWVqiSpFrEUHz2pQ0GqiSpFk3r8jVQJUm1aFiB6j1USZKqYIUqSapF0+byNVAlSbXwHqokSRVoWIFqoEqS6tHVsKkHHZQkSVIFrFAlSbWwy1eSpAo4KEmSpAr42IwkSRVoWJ46KEmSpCpYoUqSamGXryRJFWhYnhqokqR6NO2eY9M+jyRJtbBClSTVIhrW52ugSpJq0aw4NVAlSTUZrFG+EbEzsAPwa+A0iiyfCxxaHvJlYBLwDPDxzJw9kOt4D1WSVItoc1mta0RsBswCxgJnAftm5k7A34ADgP2AzMxJwN7A1yKieyCfx0CVJDVSRKwDfBv4JpDAXZl5T7l7BrAHsCtwNkBmPgDMB7YfyPUMVElSLSLaXWJaRMxrWab989wxAvgW8BngNmAM8HDL5R8CJgDje9neb95DlSTVot1Rvpk5E5jZy+5PANsBxwMbUATqGODAcv+GFEG6mCJUH1xue79ZoUqSatHV5rIqmfnfmbllZk4GPkcx8Oi+iNikPOQg4HLgWmB/gIjYiGLw0s0D+TxWqJKkWgzyc6jLgI8As6K48Fzg3HLf6RExB1gCTM/MJQO5gIEqSWq0zPy/LauTV3LI9CquY6BKkmrhxA6SJFXAqQclSapA00bFNu3zSJJUCytUSVIt7PKVJKkCzYpTA1WSVJOGFagGqiSpHl0Nq1EdlCRJUgWsUCVJtbDLV5KkCkTDunwNVElSLaxQJUmqgIOSJEnSCqxQJUm1sMtXkqQKGKiSJFWgaaN8vYcqSVIFrFAlSbXoalaBaqBKkurRtC5fA1WSVAsHJUmSVIGmVagOSpIkqQJWqJKkWjgoSZKkCtjlqzXGz378I6a+6838/CfXADBn9i+Y/sF9+Oj/eh9nnPoFMpOlzzzDqV/4LB/7z/045ICpzLvx+ppbLfVtyZIlHH/csUzdZy/eP3Vfbrn5prqbpAGIaG8ZaqxQG+x1b3oL995957Prf1m4gBNPO5N11l2PE445gltvmse9d9/JuPEbcNinPsNjixdx2EEf5JwLfkBXl79raei66orLiQguuGgWDz/8MIdMn8b5F15Md3d33U3TMOb/msPIW3ffk78/8QT77v4Gbpk/lxe8aDM233Ir9njPvgCsu976jBo9mqVLl9bcUmnVbpj9K96z9z4ATJgwga0nTuT3t99Wc6vUX9HmMtRYoQ4zG7/ghXz3sh9z5aUX8/hji9lu+1c9u+/C877OpMmv9bd8DXmLFi1m3Phxz66PH78BCxcurLFFGoiuodhv24YhUaFGxLSImBcR8y745jl1N6eRnnrqSW6ZPxeAESNHstNrduG8c7/27P5LL/4Od93xez504CF1NVFabeuvvx6LFy16dn3BgkcZN27cKn5CQ1HTKtQhEaiZOTMzd8zMHad+8MN1N6eRRo8ewxmnfoG/LFwAwOxf/JTxGzwfgMtmfZff3DKfI/7P8d471Rph8pSdufQHlwDw6KOPcPttt/HSrbepuVXqt4Ylake6fCPiPb3ty8zvd+KaWrWI4JBPHsWxn/ovMpMJG27MYZ/6DJd//0JmfOlEXvzSbThs+gcBOPKYE9jkhZvW3GKpd2/ffQ9OOvHzTN13b7pHjuToY471VoVqF5lZ/Ukj/gSczYq/Q2RmHreqn7134dPVN0iqwYR1R9fdBKkSY0Z2ph688c7H2vr//tVbrjuk6tRODUr6eGZ+r0PnliQ1QMPGJHUmUA1TSVJfGpanPjYjSapJwxLVIZ2SJFXAClWSVIumTY5voEqSauGgJEmSKtCwPPUeqiRJVbBClSTVo2ElqoEqSaqFg5IkSaqAg5IkSapAw/LUQUmSJFXBClWSVI+GlagGqiSpFg5KkiSpAg5KkiSpAg3LUwclSZKaJyJGRsTMiLghIuZExG4RMSUibizXZ0REd7mcERFzI2J2REwZ6DWtUCVJ9ehsiXog8OfMnBYR44HrgaeAd2bmPRFxEnBAuS0zc1JEbAJcGRGTMnNJfy9ooEqSatHhQUm3An8AyMwFZVj+LDPvKffPAL4GLAZOLo97ICLmA9sDc/t7QQNVklSLdgclRcQ0YFrLppmZORMgM3/RctyRwBzgTy3HPgRMoMjBh1eyvd8MVEnSGqkMz5mrOiYiPgq8AjgIOKFl14YUQboYGA88uNz2fnNQkiSpFtHm0uf5Iw4GdgE+APwR2KLs+oUiYC8HrgX2L4/fCNgBuHkgn8cKVZJUjw7eQo2I6cBXgfnAL8vNhwKzIiIo7pGeW24/PSLmAEuA6QMZkAQQmdlWo6t278Knh1aDpAGasO7oupsgVWLMyM5E3+0P/r2t/++33mjskHqU1QpVklSLps2U5D1USZIqYIUqSapFwwpUA1WSVJOGJaqBKkmqha9vkySpAg5KkiRJK7BClSTVomEFqoEqSapJwxLVQJUk1aJpg5K8hypJUgWsUCVJtWjaKF8DVZJUi4blqYEqSapJwxLVQJUk1cJBSZIkaQVWqJKkWjgoSZKkCjQsTw1USVI9rFAlSapEsxLVQUmSJFXAClWSVAu7fCVJqkDD8tRAlSTVo2kVqvdQJUmqgBWqJKkWTZt60ECVJNWjWXlqoEqS6tGwPDVQJUn1cFCSJElagRWqJKkWDkqSJKkKzcpTA1WSVI+G5amBKkmqh4OSJEnSCqxQJUm1cFCSJEkVsMtXkiStwECVJKkCdvlKkmrRtC5fA1WSVAsHJUmSVAErVEmSKtCwPHVQkiRJVbBClSTVo2ElqoEqSaqFg5IkSapA0wYleQ9VkqQKWKFKkmrRsALVClWSVJNoc1nVqSO6I+KMiJgbEbMjYkqHPsWzrFAlSbXo8KCk/YDMzEkRsQlwZURMyswlnbqgFaokqRYR7S192BU4GyAzHwDmA9t38vMYqJKkJhoPPNyy/hAwoZMXHHJdvpuOG920+9RDUkRMy8yZdbdDaod/j9dsY0a21+cbEdOAaS2bZrb8fVhAEaoPlusb8q8BW7nIzE6eX0NURMzLzB3rbofUDv8eqzcRsT+wXWZ+IiI2Aq4CdvIeqiRJ/XM+MDYi5gAXA9M7GaYwBLt8JUlqVxme0wfzmlaow5f3ndQE/j3WkOE9VEmSKmCFKklSBQzUYaSOqbikTomIfSLi/ojYu+62SOCgpOFm0KfikjolMy+KiIl1t0PqYYU6vAz6VFySNFwYqMPLoE/FJUnDhYE6vPRMxdWj41NxSdJwYaAOL9cC+wOUU3HtANxcY3skqTEM1OFl0KfikqThwokdJEmqgBWqJEkVMFAlSaqAgSpJUgUMVEmSKmCgSpJUAQNVWoWIGBURI1rWu1vXe/mZrpbvIyKik22UNDQYqFKLiNiifBvPTyPiTuB9wK8i4pGI+DVwPbBtRHwlIt4aEQdHxIcj4sCImFiG6YyI2KY85ZbA5eW5/ysibo6IGyNifkR8veW6ERE/ioiXl+td5frzB/UPQNKA+RyqtJyI2ANYHxibmWeW264B9szMJ8r1jYHJwMHAacCbgD8BLwcWAl8BlgIvAv43cHRm3ryKax4CHAL8DXgC+B7F24HGAfcCp2bmFRV/VEkVskKVVrQM+BJwbkS8PiKuB14BXBYRF0fEaOAFwDXA/cCHgP8PPA48A/T8lvoB4NvAvwFXRMTaPd3FZUU6puxCHg+8FJgIzAMuAHoCex7wDsNUGvoMVKlFWZ0eSfFv4yfA7hTB+W7gvcBmwEbA6RSvw4PivcLLlj9XZh5PUZ1elZk9AXxjRDwJ3AjcAOycmQsy86PlOe4DNqeoSr8EfCwzn+rEZ5VULV8wLv2rq4CrgdnA7cAfKQL0E8ARAJl5T0S8Bngu8C6Krt3evBvYICI+B+ySmcsi4nZgCtCVmUsiYmx5ngOBTYAHgJ2A9YBdImIZ8JnM/GHFn1VShQxUqUVmLm0ZlXsMRVfvRsBzKF5/1+MtFPc3AcaUX58DtI4I3ofifioU92SPi4j7y/V/p3jbz5EUIfoG4CCKLuQdgLspquQXAU+W2yUNYXb5Sr3IzAcpqs9HKMJzLHBLuXt3igr2XOAxYFPg+xRv9OnRBfSM5D0MeGPP/sz8LrBTREzJzD9m5jTg/cBuwCSKyvbdwKsz89eZ6XtrpSHOQJVWNLbl+98AbwXOBL4F/LYclPQ64A7gBOBU4B7gv4G5wPOBLEPzfiCAw4HrMvOv/LOK/RQwquVaFwD/oBjIdDHwzvK6ktYAdvlKK/oCxYjebSkqyqMz84qI+DZF6M2iqDxfDJybmXOAORHxI+A/KarZnopyHYqu4FOAURFxBvBngMy8seeCEXESRTfy48ASYDSwBfDTiFiUmbt1+DNLapPPoUqDKCLGrGzUbln1LsnMZcttD2C0I32loc9AlSSpAt5DlSSpAgaqJEkVMFAlSaqAgSpJUgX+B9WlcBYNJc54AAAAAElFTkSuQmCC\n"
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"execution_count": 34
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:54.941365Z",
|
||
"start_time": "2025-05-14T06:41:54.936085Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from sklearn.model_selection import GridSearchCV\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score"
|
||
],
|
||
"id": "ba37982a3d38c98a",
|
||
"outputs": [],
|
||
"execution_count": 44
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:57.023385Z",
|
||
"start_time": "2025-05-14T06:41:57.019735Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"scaler = StandardScaler()\n",
|
||
"X_train_pca_scaled = scaler.fit_transform(X_train_pca)\n",
|
||
"X_test_pca_scaled = scaler.transform(X_test_pca)"
|
||
],
|
||
"id": "b3c2d3881eeddf31",
|
||
"outputs": [],
|
||
"execution_count": 45
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:58.292401Z",
|
||
"start_time": "2025-05-14T06:41:58.289737Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"param_grid = {\n",
|
||
" 'C': [0.001, 0.01, 0.1, 1, 10, 100],\n",
|
||
" 'penalty': ['l1', 'l2']\n",
|
||
"}"
|
||
],
|
||
"id": "a70b8e803c245dfd",
|
||
"outputs": [],
|
||
"execution_count": 46
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:59.957375Z",
|
||
"start_time": "2025-05-14T06:41:59.839417Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"model = LogisticRegression(solver='liblinear')\n",
|
||
"grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1')\n",
|
||
"grid_search.fit(X_train_pca_scaled, y_train)\n"
|
||
],
|
||
"id": "acebcefaaed09b5e",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"GridSearchCV(cv=5, estimator=LogisticRegression(solver='liblinear'),\n",
|
||
" param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],\n",
|
||
" 'penalty': ['l1', 'l2']},\n",
|
||
" scoring='f1')"
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 47
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:09.325733Z",
|
||
"start_time": "2025-05-14T06:42:09.323292Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(\"最优超参数组合:\", grid_search.best_params_)\n",
|
||
"print(\"最优模型在训练集上的 F1 值:\", grid_search.best_score_)\n"
|
||
],
|
||
"id": "f473f0e3fe11601b",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"最优超参数组合: {'C': 0.001, 'penalty': 'l1'}\n",
|
||
"最优模型在训练集上的 F1 值: 0.0\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 48
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:23.992660Z",
|
||
"start_time": "2025-05-14T06:42:23.989902Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"best_model = grid_search.best_estimator_\n",
|
||
"y_pred_best = best_model.predict(X_test_pca_scaled)"
|
||
],
|
||
"id": "31341e379c6efb7f",
|
||
"outputs": [],
|
||
"execution_count": 51
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:24.868183Z",
|
||
"start_time": "2025-05-14T06:42:24.860990Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"precision_best = precision_score(y_test, y_pred_best)\n",
|
||
"recall_best = recall_score(y_test, y_pred_best)\n",
|
||
"f1_best = f1_score(y_test, y_pred_best)\n",
|
||
"accuracy_best = accuracy_score(y_test, y_pred_best)"
|
||
],
|
||
"id": "8406ad284d7569a3",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/Volumes/Data/Environment/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 52
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:30.031422Z",
|
||
"start_time": "2025-05-14T06:42:30.028917Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(f\"最优模型在测试集上的准确率: {accuracy_best * 100:.2f}%\")\n",
|
||
"print(f\"最优模型在测试集上的精确率: {precision_best * 100:.2f}%\")\n",
|
||
"print(f\"最优模型在测试集上的召回率: {recall_best * 100:.2f}%\")\n",
|
||
"print(f\"最优模型在测试集上的 F1 值: {f1_best * 100:.2f}%\")"
|
||
],
|
||
"id": "74828d74cc923980",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"最优模型在测试集上的准确率: 89.57%\n",
|
||
"最优模型在测试集上的精确率: 0.00%\n",
|
||
"最优模型在测试集上的召回率: 0.00%\n",
|
||
"最优模型在测试集上的 F1 值: 0.00%\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 53
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "code",
|
||
"outputs": [],
|
||
"execution_count": null,
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"from sklearn.metrics import classification_report"
|
||
],
|
||
"id": "57ef68abfd6fde0b"
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|