Pandas_basics
Pandas Basics¶
Copyright By PowCoder代写 加微信 powcoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
s = pd.Series(np.random.randn(5), index=[‘a’,’b’, ‘c’, ‘d’, ‘e’])
a 0.151601
b 0.338190
c 1.121875
d 0.213966
e 0.353676
dtype: float64
s1 = pd.Series(np.random.randn(5))
0 1.710192
1 -0.418625
2 -0.108972
3 -0.652015
4 -0.916771
dtype: float64
a 0.151601
b 0.338190
c 1.121875
d 0.213966
e 0.353676
dtype: float64
Index([‘a’, ‘b’, ‘c’, ‘d’, ‘e’], dtype=’object’)
pd.Series(np.random.randn(5))
0 1.131017
1 -1.552188
2 -0.021066
3 -1.439644
4 -1.196829
dtype: float64
d = {‘a’ : 0., ‘b’ : 1., ‘c’ : 2.}
pd.Series(d)
dtype: float64
pd.Series(d, index=[‘b’, ‘c’, ‘d’, ‘a’])
dtype: float64
pd.Series(5., index=[‘a’, ‘b’, ‘c’, ‘d’, ‘e’])
dtype: float64
0.15160063640702348
a 0.151601
b 0.338190
c 1.121875
dtype: float64
0.15160063640702348
s[‘e’] = 12.
a 0.151601
b 0.338190
c 1.121875
d 0.213966
e 12.000000
dtype: float64
s.get(‘a’)
0.15160063640702348
ts1 = pd.Series(np.random.randn(5))
ts2 = pd.Series(np.random.randn(5))
0 0.283003
1 -1.598701
2 1.213011
3 -0.258805
4 -0.469464
dtype: float64
0 0.689568
1 0.955640
2 0.326964
3 0.008426
4 -0.036922
dtype: float64
d = {‘col1’: ts1, ‘col2’: ts2}
{‘col1’: 0 0.283003
1 -1.598701
2 1.213011
3 -0.258805
4 -0.469464
dtype: float64,
‘col2’: 0 0.689568
1 0.955640
2 0.326964
3 0.008426
4 -0.036922
dtype: float64}
df1 = pd.DataFrame(data = d)
0 0.283003 0.689568
1 -1.598701 0.955640
2 1.213011 0.326964
3 -0.258805 0.008426
4 -0.469464 -0.036922
df2 = pd.DataFrame(np.random.randn(10, 5))
0 0.470767 -0.595539 0.353596 -2.623139 -0.399702
1 -1.168683 0.222482 1.137190 -2.619626 -0.360753
2 -1.706493 0.082381 1.313694 0.388377 -1.088677
3 1.352092 0.313504 1.277715 0.471739 -0.835905
4 1.178728 1.109365 0.604487 -0.028254 0.641070
5 -1.242005 1.350884 0.235332 0.063167 1.082531
6 0.590282 2.253916 -0.716329 -0.398213 0.663291
7 -1.547727 1.220111 0.397296 -0.373423 -1.331188
8 -0.887249 0.046792 -0.489288 -0.770825 -0.001106
9 1.104887 0.443361 0.622741 0.613117 0.024925
df3 = pd.DataFrame(np.random.randn(10, 5), columns=[‘a’, ‘b’, ‘c’, ‘d’, ‘e’])
0 0.276001 0.791602 1.517536 -0.585271 -1.257917
1 1.144520 1.035529 0.880881 1.540087 -1.727272
2 -0.050916 0.582198 -0.667021 0.465296 0.554556
3 -0.504030 -0.794967 0.377888 -0.975457 1.087849
4 0.532643 0.290046 -0.135843 -0.608255 -0.163569
5 0.555990 -1.395719 1.283941 -1.597420 1.450402
6 -0.222846 0.845612 0.831084 -1.090636 -0.419451
7 0.749854 -0.360554 0.318489 -0.594535 -1.679092
8 0.367934 -0.919072 0.352977 0.922358 1.583149
9 0.427854 0.179948 0.197143 1.128120 1.877090
d = {‘one’ : pd.Series([1., 2., 3.], index=[‘a’, ‘b’, ‘c’]), ‘two’ : pd.Series([1., 2., 3., 4.], index=[‘a’, ‘b’, ‘c’, ‘d’])}
df = pd.DataFrame(d)
array([[ 1., 1.],
[ 2., 2.],
[ 3., 3.],
[nan, 4.]])
pd.DataFrame(d, index=[‘d’, ‘b’, ‘a’])
Index([‘a’, ‘b’, ‘c’, ‘d’], dtype=’object’)
df.columns
Index([‘one’, ‘two’], dtype=’object’)
df.index.hasnans
dfc = pd.read_csv(‘./data1.csv’)
date variable value
0 2000-01-03 A 0.469112
1 2000-01-04 A -0.282863
2 2000-01-05 A -1.509059
3 2000-01-03 B -1.135632
4 2000-01-04 B 1.212112
5 2000-01-05 B -0.173215
6 2000-01-03 C 0.119209
7 2000-01-04 C -1.044236
8 2000-01-05 C -0.861849
9 2000-01-03 D -2.104569
10 2000-01-04 D -0.494929
11 2000-01-05 D 1.071804
dfc[‘variable’] == ‘A’
0 True
1 True
2 True
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
Name: variable, dtype: bool
dfc[dfc[‘variable’] == ‘A’]
date variable value
0 2000-01-03 A 0.469112
1 2000-01-04 A -0.282863
2 2000-01-05 A -1.509059
dfc.pivot(index=’date’, columns=’variable’, values=’value’)
variable A B C D
2000-01-03 0.469112 -1.135632 0.119209 -2.104569
2000-01-04 -0.282863 1.212112 -1.044236 -0.494929
2000-01-05 -1.509059 -0.173215 -0.861849 1.071804
dfc.describe()
count 12.000000
mean -0.394510
std 1.007649
min -2.104569
25% -1.067085
50% -0.388896
75% 0.206685
max 1.212112
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com