{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('table.csv',index_col='ID')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" School | \n",
" Class | \n",
" Gender | \n",
" Address | \n",
" Height | \n",
" Weight | \n",
" Math | \n",
" Physics | \n",
"
\n",
" \n",
" ID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1101 | \n",
" S_1 | \n",
" C_1 | \n",
" M | \n",
" street_1 | \n",
" 173 | \n",
" 63 | \n",
" 34.0 | \n",
" A+ | \n",
"
\n",
" \n",
" 1102 | \n",
" S_1 | \n",
" C_1 | \n",
" F | \n",
" street_2 | \n",
" 192 | \n",
" 73 | \n",
" 32.5 | \n",
" B+ | \n",
"
\n",
" \n",
" 1103 | \n",
" S_1 | \n",
" C_1 | \n",
" M | \n",
" street_2 | \n",
" 186 | \n",
" 82 | \n",
" 87.2 | \n",
" B+ | \n",
"
\n",
" \n",
" 1104 | \n",
" S_1 | \n",
" C_1 | \n",
" F | \n",
" street_2 | \n",
" 167 | \n",
" 81 | \n",
" 80.4 | \n",
" B- | \n",
"
\n",
" \n",
" 1105 | \n",
" S_1 | \n",
" C_1 | \n",
" F | \n",
" street_4 | \n",
" 159 | \n",
" 64 | \n",
" 84.8 | \n",
" B+ | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" School Class Gender Address Height Weight Math Physics\n",
"ID \n",
"1101 S_1 C_1 M street_1 173 63 34.0 A+\n",
"1102 S_1 C_1 F street_2 192 73 32.5 B+\n",
"1103 S_1 C_1 M street_2 186 82 87.2 B+\n",
"1104 S_1 C_1 F street_2 167 81 80.4 B-\n",
"1105 S_1 C_1 F street_4 159 64 84.8 B+"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 思考题"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. 将物理成绩('Physics')从 object 类型转化为类别类型 Categorical,并且定义其顺序,列出每个分数(按降序排列)有多少学生,以及每个物理分数对应的同学的数学成绩的平均值。"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data['Physics'] = pd.Categorical(data['Physics'],categories=['C','B-','B','B+','A-','A','A+'], ordered=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Physics\n",
"A+ 3\n",
"A 4\n",
"A- 3\n",
"B+ 9\n",
"B 8\n",
"B- 6\n",
"C 2\n",
"dtype: int64"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.value_counts('Physics').sort_index(ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Physics\n",
"C 74.600000\n",
"B- 62.283333\n",
"B 52.612500\n",
"B+ 62.144444\n",
"A- 93.400000\n",
"A 49.350000\n",
"A+ 55.533333\n",
"Name: Math, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby('Physics')['Math'].mean().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. 将数学成绩('Math')按照分数段 0-40,40-50,50-60,60-70,70-80,80-90,90-100 (左开右闭区间) 转换成 'A+','A','A-','B+','B','B-','C' 的类别数据。列出每个分数(按升序排列)有多少学生。找出数学和物理都得 A 的学生。"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"bins = [0,40,50,60,70,80,90,100]\n",
"cuts = pd.cut(data['Math'],bins=bins,labels=['C','B-','B','B+','A-','A','A+'])\n",
"data['Math'] = pd.Categorical(cuts,categories=['C','B-','B','B+','A-','A','A+'], ordered=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Math\n",
"C 7\n",
"B- 6\n",
"B 4\n",
"B+ 6\n",
"A- 3\n",
"A 7\n",
"A+ 2\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.value_counts('Math').sort_index(ascending=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" School | \n",
" Class | \n",
" Gender | \n",
" Address | \n",
" Height | \n",
" Weight | \n",
" Math | \n",
" Physics | \n",
"
\n",
" \n",
" ID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1304 | \n",
" S_1 | \n",
" C_3 | \n",
" M | \n",
" street_2 | \n",
" 195 | \n",
" 70 | \n",
" A | \n",
" A | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" School Class Gender Address Height Weight Math Physics\n",
"ID \n",
"1304 S_1 C_3 M street_2 195 70 A A"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[(data.Math=='A') & (data.Physics=='A')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}