1143 lines
43 KiB
Plaintext
1143 lines
43 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:25:11.758935Z",
|
||
"start_time": "2025-05-14T06:25:11.434128Z"
|
||
}
|
||
},
|
||
"source": "import pandas as pd",
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:25:41.010808Z",
|
||
"start_time": "2025-05-14T06:25:40.997687Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"data = pd.read_csv('./data/shill_bidding.csv',encoding='gbk')\n",
|
||
"data"
|
||
],
|
||
"id": "be73a1ac2c569ba7",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 记录ID 拍卖ID 竞标者倾向 竞标比率 连续竞标 上次竞标 竞标量 拍卖起拍 \\\n",
|
||
"0 1 732 0.200000 0.400000 0.0 0.000028 0.000000 0.993593 \n",
|
||
"1 2 732 0.024390 0.200000 0.0 0.013123 0.000000 0.993593 \n",
|
||
"2 3 732 0.142857 0.200000 0.0 0.003042 0.000000 0.993593 \n",
|
||
"3 4 732 0.100000 0.200000 0.0 0.097477 0.000000 0.993593 \n",
|
||
"4 5 900 0.051282 0.222222 0.0 0.001318 0.000000 0.000000 \n",
|
||
"... ... ... ... ... ... ... ... ... \n",
|
||
"6316 15129 760 0.333333 0.160000 1.0 0.738557 0.280000 0.993593 \n",
|
||
"6317 15137 2481 0.030612 0.130435 0.0 0.005754 0.217391 0.993593 \n",
|
||
"6318 15138 2481 0.055556 0.043478 0.0 0.015663 0.217391 0.993593 \n",
|
||
"6319 15139 2481 0.076923 0.086957 0.0 0.068694 0.217391 0.993593 \n",
|
||
"6320 15144 2481 0.016393 0.043478 0.0 0.340351 0.217391 0.993593 \n",
|
||
"\n",
|
||
" 早期竞标 胜率 拍卖持续时间(小时) 类别 \n",
|
||
"0 0.000028 0.666667 5 0 \n",
|
||
"1 0.013123 0.944444 5 0 \n",
|
||
"2 0.003042 1.000000 5 0 \n",
|
||
"3 0.097477 1.000000 5 0 \n",
|
||
"4 0.001242 0.500000 7 0 \n",
|
||
"... ... ... ... .. \n",
|
||
"6316 0.686358 0.888889 3 1 \n",
|
||
"6317 0.000010 0.878788 7 0 \n",
|
||
"6318 0.015663 0.000000 7 0 \n",
|
||
"6319 0.000415 0.000000 7 0 \n",
|
||
"6320 0.340351 0.000000 7 0 \n",
|
||
"\n",
|
||
"[6321 rows x 12 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>记录ID</th>\n",
|
||
" <th>拍卖ID</th>\n",
|
||
" <th>竞标者倾向</th>\n",
|
||
" <th>竞标比率</th>\n",
|
||
" <th>连续竞标</th>\n",
|
||
" <th>上次竞标</th>\n",
|
||
" <th>竞标量</th>\n",
|
||
" <th>拍卖起拍</th>\n",
|
||
" <th>早期竞标</th>\n",
|
||
" <th>胜率</th>\n",
|
||
" <th>拍卖持续时间(小时)</th>\n",
|
||
" <th>类别</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.400000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.024390</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.944444</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.142857</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.100000</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>900</td>\n",
|
||
" <td>0.051282</td>\n",
|
||
" <td>0.222222</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.001318</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.001242</td>\n",
|
||
" <td>0.500000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6316</th>\n",
|
||
" <td>15129</td>\n",
|
||
" <td>760</td>\n",
|
||
" <td>0.333333</td>\n",
|
||
" <td>0.160000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.738557</td>\n",
|
||
" <td>0.280000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.686358</td>\n",
|
||
" <td>0.888889</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6317</th>\n",
|
||
" <td>15137</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.030612</td>\n",
|
||
" <td>0.130435</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.005754</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000010</td>\n",
|
||
" <td>0.878788</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6318</th>\n",
|
||
" <td>15138</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.055556</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6319</th>\n",
|
||
" <td>15139</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.076923</td>\n",
|
||
" <td>0.086957</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.068694</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000415</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6320</th>\n",
|
||
" <td>15144</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.016393</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6321 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 5
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:41.114487Z",
|
||
"start_time": "2025-05-14T06:28:41.087602Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"X = data.iloc[:, :-1] # 特征数据\n",
|
||
"X"
|
||
],
|
||
"id": "9c57d5b64979660f",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 记录ID 拍卖ID 竞标者倾向 竞标比率 连续竞标 上次竞标 竞标量 拍卖起拍 \\\n",
|
||
"0 1 732 0.200000 0.400000 0.0 0.000028 0.000000 0.993593 \n",
|
||
"1 2 732 0.024390 0.200000 0.0 0.013123 0.000000 0.993593 \n",
|
||
"2 3 732 0.142857 0.200000 0.0 0.003042 0.000000 0.993593 \n",
|
||
"3 4 732 0.100000 0.200000 0.0 0.097477 0.000000 0.993593 \n",
|
||
"4 5 900 0.051282 0.222222 0.0 0.001318 0.000000 0.000000 \n",
|
||
"... ... ... ... ... ... ... ... ... \n",
|
||
"6316 15129 760 0.333333 0.160000 1.0 0.738557 0.280000 0.993593 \n",
|
||
"6317 15137 2481 0.030612 0.130435 0.0 0.005754 0.217391 0.993593 \n",
|
||
"6318 15138 2481 0.055556 0.043478 0.0 0.015663 0.217391 0.993593 \n",
|
||
"6319 15139 2481 0.076923 0.086957 0.0 0.068694 0.217391 0.993593 \n",
|
||
"6320 15144 2481 0.016393 0.043478 0.0 0.340351 0.217391 0.993593 \n",
|
||
"\n",
|
||
" 早期竞标 胜率 拍卖持续时间(小时) \n",
|
||
"0 0.000028 0.666667 5 \n",
|
||
"1 0.013123 0.944444 5 \n",
|
||
"2 0.003042 1.000000 5 \n",
|
||
"3 0.097477 1.000000 5 \n",
|
||
"4 0.001242 0.500000 7 \n",
|
||
"... ... ... ... \n",
|
||
"6316 0.686358 0.888889 3 \n",
|
||
"6317 0.000010 0.878788 7 \n",
|
||
"6318 0.015663 0.000000 7 \n",
|
||
"6319 0.000415 0.000000 7 \n",
|
||
"6320 0.340351 0.000000 7 \n",
|
||
"\n",
|
||
"[6321 rows x 11 columns]"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>记录ID</th>\n",
|
||
" <th>拍卖ID</th>\n",
|
||
" <th>竞标者倾向</th>\n",
|
||
" <th>竞标比率</th>\n",
|
||
" <th>连续竞标</th>\n",
|
||
" <th>上次竞标</th>\n",
|
||
" <th>竞标量</th>\n",
|
||
" <th>拍卖起拍</th>\n",
|
||
" <th>早期竞标</th>\n",
|
||
" <th>胜率</th>\n",
|
||
" <th>拍卖持续时间(小时)</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.400000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000028</td>\n",
|
||
" <td>0.666667</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.024390</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.013123</td>\n",
|
||
" <td>0.944444</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.142857</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.003042</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>732</td>\n",
|
||
" <td>0.100000</td>\n",
|
||
" <td>0.200000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.097477</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>900</td>\n",
|
||
" <td>0.051282</td>\n",
|
||
" <td>0.222222</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.001318</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.001242</td>\n",
|
||
" <td>0.500000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6316</th>\n",
|
||
" <td>15129</td>\n",
|
||
" <td>760</td>\n",
|
||
" <td>0.333333</td>\n",
|
||
" <td>0.160000</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.738557</td>\n",
|
||
" <td>0.280000</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.686358</td>\n",
|
||
" <td>0.888889</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6317</th>\n",
|
||
" <td>15137</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.030612</td>\n",
|
||
" <td>0.130435</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.005754</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000010</td>\n",
|
||
" <td>0.878788</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6318</th>\n",
|
||
" <td>15138</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.055556</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.015663</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6319</th>\n",
|
||
" <td>15139</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.076923</td>\n",
|
||
" <td>0.086957</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.068694</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.000415</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6320</th>\n",
|
||
" <td>15144</td>\n",
|
||
" <td>2481</td>\n",
|
||
" <td>0.016393</td>\n",
|
||
" <td>0.043478</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.217391</td>\n",
|
||
" <td>0.993593</td>\n",
|
||
" <td>0.340351</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6321 rows × 11 columns</p>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 9
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:43.271751Z",
|
||
"start_time": "2025-05-14T06:28:43.268190Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"y = data.iloc[:, -1] # 标签数据\n",
|
||
"y"
|
||
],
|
||
"id": "cd9249ade901dbc2",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0 0\n",
|
||
"1 0\n",
|
||
"2 0\n",
|
||
"3 0\n",
|
||
"4 0\n",
|
||
" ..\n",
|
||
"6316 1\n",
|
||
"6317 0\n",
|
||
"6318 0\n",
|
||
"6319 0\n",
|
||
"6320 0\n",
|
||
"Name: 类别, Length: 6321, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 10
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:45.332753Z",
|
||
"start_time": "2025-05-14T06:28:45.330224Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "from sklearn.model_selection import train_test_split",
|
||
"id": "8dc41c605cd3157e",
|
||
"outputs": [],
|
||
"execution_count": 11
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:28:56.522996Z",
|
||
"start_time": "2025-05-14T06:28:56.503381Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)",
|
||
"id": "9f85e76994ed4850",
|
||
"outputs": [],
|
||
"execution_count": 13
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:29:39.357247Z",
|
||
"start_time": "2025-05-14T06:29:39.346520Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(\"训练集特征数量:\", len(X_train))\n",
|
||
"print(\"测试集特征数量:\", len(X_test))\n",
|
||
"print(\"训练集标签数量:\", len(y_train))\n",
|
||
"print(\"测试集标签数量:\", len(y_test))"
|
||
],
|
||
"id": "9e7a486068bba773",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"训练集特征数量: 5056\n",
|
||
"测试集特征数量: 1265\n",
|
||
"训练集标签数量: 5056\n",
|
||
"测试集标签数量: 1265\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 14
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:32:48.037308Z",
|
||
"start_time": "2025-05-14T06:32:48.032154Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "from sklearn.decomposition import PCA",
|
||
"id": "310a87e99029912f",
|
||
"outputs": [],
|
||
"execution_count": 16
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:16.933711Z",
|
||
"start_time": "2025-05-14T06:33:16.923897Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "pca = PCA(n_components=0.999)",
|
||
"id": "b2a983bb0cafe05e",
|
||
"outputs": [],
|
||
"execution_count": 17
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:18.638792Z",
|
||
"start_time": "2025-05-14T06:33:18.615773Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "X_train_pca = pca.fit_transform(X_train)",
|
||
"id": "1e4efd6f383f884d",
|
||
"outputs": [],
|
||
"execution_count": 18
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:19.872466Z",
|
||
"start_time": "2025-05-14T06:33:19.869305Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "X_test_pca = pca.transform(X_test)",
|
||
"id": "3726f7c7adee3b78",
|
||
"outputs": [],
|
||
"execution_count": 19
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:33:29.743054Z",
|
||
"start_time": "2025-05-14T06:33:29.740149Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(\"降维后训练集大小:\", X_train_pca.shape)\n",
|
||
"print(\"降维后测试集大小:\", X_test_pca.shape)"
|
||
],
|
||
"id": "3a492d1ae08c79d7",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"降维后训练集大小: (5056, 2)\n",
|
||
"降维后测试集大小: (1265, 2)\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 20
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:33.158908Z",
|
||
"start_time": "2025-05-14T06:34:33.156943Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.metrics import accuracy_score"
|
||
],
|
||
"id": "90b993d18fc3c8fd",
|
||
"outputs": [],
|
||
"execution_count": 22
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:34.787786Z",
|
||
"start_time": "2025-05-14T06:34:34.708147Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"model = LogisticRegression()\n",
|
||
"model.fit(X_train_pca, y_train)"
|
||
],
|
||
"id": "555998c0a33ff564",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"LogisticRegression()"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 23
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:44.692219Z",
|
||
"start_time": "2025-05-14T06:34:44.689245Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"y_pred = model.predict(X_test_pca)\n",
|
||
"accuracy = accuracy_score(y_test, y_pred)"
|
||
],
|
||
"id": "550e99b2717dd70a",
|
||
"outputs": [],
|
||
"execution_count": 24
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:34:48.357752Z",
|
||
"start_time": "2025-05-14T06:34:48.355434Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "print(f\"模型在测试集上的准确率: {accuracy * 100:.2f}%\")",
|
||
"id": "d371ece25527b933",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"模型在测试集上的准确率: 89.57%\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 25
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:35:35.499762Z",
|
||
"start_time": "2025-05-14T06:35:34.902637Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix\n",
|
||
"import seaborn as sns\n",
|
||
"import matplotlib.pyplot as plt"
|
||
],
|
||
"id": "30954934fe9605f0",
|
||
"outputs": [],
|
||
"execution_count": 26
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:35:44.931054Z",
|
||
"start_time": "2025-05-14T06:35:44.926161Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "precision = precision_score(y_test, y_pred)",
|
||
"id": "b1e217f2b2899fd5",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/Volumes/Data/Environment/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 27
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:35:53.539971Z",
|
||
"start_time": "2025-05-14T06:35:53.536968Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "recall = recall_score(y_test, y_pred)",
|
||
"id": "d319c92518dfa777",
|
||
"outputs": [],
|
||
"execution_count": 28
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:36:02.345297Z",
|
||
"start_time": "2025-05-14T06:36:02.342210Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "f1 = f1_score(y_test, y_pred)",
|
||
"id": "a99ded51edcc3c2a",
|
||
"outputs": [],
|
||
"execution_count": 29
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:36:11.818015Z",
|
||
"start_time": "2025-05-14T06:36:11.815129Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(f\"精确率: {precision * 100:.2f}%\")\n",
|
||
"print(f\"召回率: {recall * 100:.2f}%\")\n",
|
||
"print(f\"F1 值: {f1 * 100:.2f}%\")"
|
||
],
|
||
"id": "eeeb022d06ac0cdd",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"精确率: 0.00%\n",
|
||
"召回率: 0.00%\n",
|
||
"F1 值: 0.00%\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 30
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:36:20.417042Z",
|
||
"start_time": "2025-05-14T06:36:20.411479Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "cm = confusion_matrix(y_test, y_pred)",
|
||
"id": "2e7c7fcd91a7da1b",
|
||
"outputs": [],
|
||
"execution_count": 31
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:37:03.532720Z",
|
||
"start_time": "2025-05-14T06:37:03.505902Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from matplotlib import font_manager as fm\n",
|
||
"import matplotlib as mpl\n",
|
||
"\n",
|
||
"font_path = '/System/Library/Fonts/STHeiti Medium.ttc'\n",
|
||
"my_font = fm.FontProperties(fname=font_path)\n",
|
||
"mpl.rcParams['font.family'] = my_font.get_name()\n",
|
||
"mpl.rcParams['axes.unicode_minus'] = False"
|
||
],
|
||
"id": "d6a1fd6bb39c2e86",
|
||
"outputs": [],
|
||
"execution_count": 33
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:37:05.154764Z",
|
||
"start_time": "2025-05-14T06:37:05.074128Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"plt.figure(figsize=(8, 6))\n",
|
||
"sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')\n",
|
||
"# annot=True 表示在热力图上显示具体数值,fmt='d' 表示以整数形式显示\n",
|
||
"plt.xlabel('预测标签')\n",
|
||
"plt.ylabel('真实标签')\n",
|
||
"plt.title('混淆矩阵')\n",
|
||
"plt.show()"
|
||
],
|
||
"id": "8fd3248dde46e851",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<Figure size 576x432 with 2 Axes>"
|
||
],
|
||
"image/png": "\n"
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"execution_count": 34
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:54.941365Z",
|
||
"start_time": "2025-05-14T06:41:54.936085Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from sklearn.model_selection import GridSearchCV\n",
|
||
"from sklearn.preprocessing import StandardScaler\n",
|
||
"from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score"
|
||
],
|
||
"id": "ba37982a3d38c98a",
|
||
"outputs": [],
|
||
"execution_count": 44
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:57.023385Z",
|
||
"start_time": "2025-05-14T06:41:57.019735Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"scaler = StandardScaler()\n",
|
||
"X_train_pca_scaled = scaler.fit_transform(X_train_pca)\n",
|
||
"X_test_pca_scaled = scaler.transform(X_test_pca)"
|
||
],
|
||
"id": "b3c2d3881eeddf31",
|
||
"outputs": [],
|
||
"execution_count": 45
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:58.292401Z",
|
||
"start_time": "2025-05-14T06:41:58.289737Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"param_grid = {\n",
|
||
" 'C': [0.001, 0.01, 0.1, 1, 10, 100],\n",
|
||
" 'penalty': ['l1', 'l2']\n",
|
||
"}"
|
||
],
|
||
"id": "a70b8e803c245dfd",
|
||
"outputs": [],
|
||
"execution_count": 46
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:41:59.957375Z",
|
||
"start_time": "2025-05-14T06:41:59.839417Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"model = LogisticRegression(solver='liblinear')\n",
|
||
"grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1')\n",
|
||
"grid_search.fit(X_train_pca_scaled, y_train)\n"
|
||
],
|
||
"id": "acebcefaaed09b5e",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"GridSearchCV(cv=5, estimator=LogisticRegression(solver='liblinear'),\n",
|
||
" param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],\n",
|
||
" 'penalty': ['l1', 'l2']},\n",
|
||
" scoring='f1')"
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 47
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:09.325733Z",
|
||
"start_time": "2025-05-14T06:42:09.323292Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(\"最优超参数组合:\", grid_search.best_params_)\n",
|
||
"print(\"最优模型在训练集上的 F1 值:\", grid_search.best_score_)\n"
|
||
],
|
||
"id": "f473f0e3fe11601b",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"最优超参数组合: {'C': 0.001, 'penalty': 'l1'}\n",
|
||
"最优模型在训练集上的 F1 值: 0.0\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 48
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:23.992660Z",
|
||
"start_time": "2025-05-14T06:42:23.989902Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"best_model = grid_search.best_estimator_\n",
|
||
"y_pred_best = best_model.predict(X_test_pca_scaled)"
|
||
],
|
||
"id": "31341e379c6efb7f",
|
||
"outputs": [],
|
||
"execution_count": 51
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:24.868183Z",
|
||
"start_time": "2025-05-14T06:42:24.860990Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"precision_best = precision_score(y_test, y_pred_best)\n",
|
||
"recall_best = recall_score(y_test, y_pred_best)\n",
|
||
"f1_best = f1_score(y_test, y_pred_best)\n",
|
||
"accuracy_best = accuracy_score(y_test, y_pred_best)"
|
||
],
|
||
"id": "8406ad284d7569a3",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/Volumes/Data/Environment/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||
" _warn_prf(average, modifier, msg_start, len(result))\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 52
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-14T06:42:30.031422Z",
|
||
"start_time": "2025-05-14T06:42:30.028917Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"print(f\"最优模型在测试集上的准确率: {accuracy_best * 100:.2f}%\")\n",
|
||
"print(f\"最优模型在测试集上的精确率: {precision_best * 100:.2f}%\")\n",
|
||
"print(f\"最优模型在测试集上的召回率: {recall_best * 100:.2f}%\")\n",
|
||
"print(f\"最优模型在测试集上的 F1 值: {f1_best * 100:.2f}%\")"
|
||
],
|
||
"id": "74828d74cc923980",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"最优模型在测试集上的准确率: 89.57%\n",
|
||
"最优模型在测试集上的精确率: 0.00%\n",
|
||
"最优模型在测试集上的召回率: 0.00%\n",
|
||
"最优模型在测试集上的 F1 值: 0.00%\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 53
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "code",
|
||
"outputs": [],
|
||
"execution_count": null,
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"from sklearn.metrics import classification_report"
|
||
],
|
||
"id": "57ef68abfd6fde0b"
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|