牛骨文教育服务平台(让学习变的简单)
博文笔记

Python 学习随笔

创建时间:2016-04-24 投稿人: 浏览次数:1939

pandas.Series.unique() 查找所包含的数值

a=pd.DataFrame({"a":[1,2,3,4,1,1,1,1,1,1,1,1,np.nan],
                "b":[2,2,3,4,2,2,2,2,2,2,2,2,2],
                "c":[3,2,3,4,4,4,4,4,4,4,4,4,4],
                "d":[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a["a"].unique()
print c
---------------------------------
[  1.   2.   3.   4.  nan]

numpy 排序 sorted

a=pd.DataFrame({"a":[5,6,3,4,1,1,1,1,1,1,1,1,np.nan],
                "b":[2,2,3,4,2,2,2,2,2,2,2,2,2],
                "c":[3,2,3,4,4,4,4,4,4,4,4,4,4],
                "d":[4,2,3,4,4,4,4,4,4,5,5,5,5]})
c=a["a"].unique()
print c
print sorted(c)
-------------------------------------------------
[  5.   6.   3.   4.   1.  nan]
[1.0, 3.0, 4.0, 5.0, 6.0, nan]

已知dataframe中a,b的值输出c的值 (loc 补充)

a=pd.DataFrame({"a":[5,6,3,4,1,1,1,1,1,1,1,1,5],
                "b":[1,2,3,4,5,6,7,8,9,10,11,12,13],
                "c":[3,3,3,4,4,4,4,4,4,5,5,5,5],
                "d":[4,2,3,4,4,4,4,4,4,5,5,5,5]})

d=a.loc[(a["a"]==1)&(a["b"]==5)]
print len(d)
print d.loc[:,"c"].values[0]
----------------------------------
1
4

取整

int()#向下
round(),#四舍五入
math.ceil()#向上取整

重复列表元素n次

a=[1,2,3,4]
b=[i for i in a for x in range(n)]

取余数

5%2------》1

divmod(5,2)------》(2,1)

统计周期内的和

def tran_14(dataframe):
    m,n=divmod(len(dataframe),14)
    new_dataframe=dataframe.iloc[n::,:]
    new_dataframe["index14"]=[i for i in range (m) for x in range (14)]
    new_14_data=new_dataframe.groupby("index14").sum()
    return new_14_data

转化为时间序列

b=pd.read_csv(w_file2[i],index_col=0)
dateindex=pd.to_datetime(b.index,format="%Y%m%d")
b.index=pd.DatetimeIndex(dateindex)

画时间序列

一 出处http://blog.csdn.net/rumswell/article/details/9862089

from matplotlib.dates import AutoDateLocator, DateFormatter  
autodates = AutoDateLocator()  
yearsFmt = DateFormatter("%Y-%m-%d %H:%M:%S")  
figure.autofmt_xdate()        #设置x轴时间外观  
ax.xaxis.set_major_locator(autodates)       #设置时间间隔  
ax.xaxis.set_major_formatter(yearsFmt)      #设置时间显示格式  
ax.set_xticks() #设置x轴间隔  
ax.set_xlim()   #设置x轴范围  

from matplotlib.dates import  DateFormatter
yearsFmt = DateFormatter("%Y-%m-%d")
data_r=pd.date_range("2014/10/10","2015/12/27",freq="10D")
b1=b.plot(xticks=data_r,grid=True,rot=45)
b1.legend("")
b1.xaxis.set_major_formatter(yearsFmt)
plt.grid(True)
b1.set_title(w_file2[i])

正则re

之前的id有一个大写的X,这里用re的search去掉它

def search_id(data):
    title=re.search("X([0-9]+)",data)
    title=title.group(1)
    return title
item_id=[search_id(id) for id in item_id]

附:
Python提供了两种不同的原始操作:match和search。match是从字符串的起点开始做匹配,而search(perl默认)是从字符串做任意匹配。
prog = re.compile(pattern)
result = prog.match(string)

result = re.match(pattern, string)
是等价的。
. ^ $ * + ? { [ ] | ( )是几个比较特殊的字符含义见一下博客
http://www.cnblogs.com/huxi/archive/2010/07/04/1771073.html

d  匹配任何十进制数;它相当于类 [0-9]。

D  匹配任何非数字字符;它相当于类 [^0-9]。

s  匹配任何空白字符;它相当于类  [ "t"n"r"f"v]。

S  匹配任何非空白字符;它相当于类 [^ "t"n"r"f"v]。

w  匹配任何字母数字字符;它相当于类 [a-zA-Z0-9_]。

W  匹配任何非字母数字字符;它相当于类 [^a-zA-Z0-9_]。

http://www.runoob.com/python/python-reg-expressions.html(推荐)

#!/usr/bin/python
import re

line = "Cats are smarter than dogs"

matchObj = re.match( r"(.*) are (.*?) .*", line, re.M|re.I)

if matchObj:
   print "matchObj.group() : ", matchObj.group()
   print "matchObj.group(1) : ", matchObj.group(1)
   print "matchObj.group(2) : ", matchObj.group(2)
else:
   print "No match!!"
   ----------------------------
matchObj.group() :  Cats are smarter than dogs
matchObj.group(1) :  Cats
matchObj.group(2) :  smarter
import re
print(re.search("www", "www.runoob.com").span())  # 在起始位置匹配
print(re.search("com", "www.runoob.com").span())         # 不在起始位置匹配

======================
(0, 3)
(11, 14)
import re

line = "Cats are smarter than dogs";

searchObj = re.search( r"(.*) are (.*?) .*", line, re.M|re.I)

if searchObj:
   print "searchObj.group() : ", searchObj.group()
   print "searchObj.group(1) : ", searchObj.group(1)
   print "searchObj.group(2) : ", searchObj.group(2)
else:
   print "Nothing found!!"
-------------------------------
searchObj.group() :  Cats are smarter than dogs
searchObj.group(1) :  Cats
searchObj.group(2) :  smarter

Series.str.split()

Series.str can be used to access the values of the series as strings and apply several methods to it.
例子在下方

pandas.DataFrame.stack

以level为支点展开
DataFrame.stack(level=-1, dropna=True)

level : int, string, or list of these, default last level
Level(s) to stack, can pass level name
dropna : boolean, default True
Whether to drop rows in the resulting Frame/Series with no valid valuesmples

>>> s
     a   b
one  1.  2.
two  3.  4.
>>> s.stack()
one a    1
    b    2
two a    3
    b    4

pandas.Series.apply

Series.apply(func, convert_dtype=True, args=(), **kwds)
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.apply.html

a=pd.DataFrame({"a":["1","w,q,i"],"b":["2","o"]},columns=["b","a"])
print a.a.str.split(",")
print "--------------"
print a.a.str.split(",").apply(pd.Series,1)
print "--------------"
print a.a.str.split(",").apply(pd.Series,1).stack()
==============================================
0          [1]
1    [w, q, i]
dtype: object
--------------
   0    1    2
0  1  NaN  NaN
1  w    q    i
--------------
0  0    1
1  0    w
   1    q
   2    i
dtype: object

一列转化为多列 stack reset_index split

a=pd.DataFrame({"a":["1","w,q,i"],"b":["2","o"]},columns=["b","a"])

b = pd.DataFrame(a.a.str.split(",").tolist(), index=a.b)
print b
print "1---------------------"
b = pd.DataFrame(a.a.str.split(",").tolist(), index=a.b).stack()
print b
print "2---------------------"
b = b.reset_index()[[0, "b"]] # var1 variable is currently labeled 0
print b
print "3---------------------"
b.columns = ["a", "b"] # renaming var1
print b
=========================================
  0     1     2
b               
2  1  None  None
o  w     q     i
1---------------------
b   
2  0    1
o  0    w
   1    q
   2    i
dtype: object
2---------------------
   0  b
0  1  2
1  w  o
2  q  o
3  i  o
3---------------------
   a  b
0  1  2
1  w  o
2  q  o
3  i  o

一列转化为多列 pd.Series() concat() iterrows()

a=pd.DataFrame({"a":["1","w,q,i"],"b":["2","o"]},columns=["b","a"])

b=pd.concat([pd.Series(row["b"], row["a"].split(","))
                    for _, row in a.iterrows()]).reset_index()

print b
print [row["b"]for _, row in a.iterrows()]
print [row["a"].split(",") for _, row in a.iterrows()]
print pd.Series([ "o"],[ ["w", "q", "i"]])
===============================

  index  0
0     1  2
1     w  o
2     q  o
3     i  o

["2", "o"]

[["1"], ["w", "q", "i"]]


w    o
q    o
i    o
dtype: object

pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

concat()

c=pd.Series([ "o"],[ ["w", "q", "i"]])
d=pd.Series([ "o"],[ ["w", "q", "i"]])
print pd.concat([c,d])
========================
w    o
q    o
i    o
w    o
q    o
i    o
dtype: object

转化pandas 日期格式

如将‘2015/01/01’转化为‘20150101’

a=pd.DataFrame({"a":[1,2,3,4,5,6,7],
                "b":[2,2,3,4,8,6,7],
                "c":[3,2,3,4,5,6,7]})

dates=pd.date_range("20150901",periods=7)
date2=dates.strftime("%Y%m%d")
a.index=date2
print a
=====================
         a  b  c
20150901  1  2  3
20150902  2  2  2
20150903  3  3  3
20150904  4  4  4
20150905  5  8  5
20150906  6  6  6
20150907  7  7  7
from PIL import Image
img=Image.open(img_path)
(imgw,imgh)=img.size
for name in files:
    fullname=os.path.join(maskpath+name)
    mask_png=cv2.imread(fullname,cv2.IMREAD_GRAYSCALE)
    #x,y=np.shape(mask_png)
    for object_i in idx:
        img = mask_png.copy()
        img[img!=object_i]=0
        img[img==object_i]=255
        if img.sum().sum()<=2550:# remove some insignificance objects
            continue
        object_name=BB["Name"][BB["Idx"] == object_i].values[0]
        object_name=object_name.replace(" ","")
        print object_name,object_i
        img2=img.copy()
        contours,h=cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
        for contours_i in range(len(contours)):
            x, y, w, h = cv2.boundingRect(contours[contours_i])
            print name[0:-3]+"jpg",object_i,contours_i+1,x,y,w,h
            if (w*h>16):
                output.write(name[0:-3]+"jpg"+" "+object_name+" "+str(x)+" "+str(y)+" "+str(x+w)+" "+str(y+h)+"
")
output.close()
            #cv2.rectangle(img2,(x,y),(x+w,y+h),255,1)
            #cv2.imshow(name, img2)
        #cv2.waitKey(10000)
        #cv2.destroyAllWindows()

a[‘Name’]=a[‘Name’].apply(lambda x:x.replace(’ ‘,”))

In [57]: np.minimum([1,2,3,4],[3])
Out[57]: array([1, 2, 3, 3])

In [58]: np.maximum(np.minimum([1,2,3,4],[3]),0)
Out[58]: array([1, 2, 3, 3])

In [59]: np.maximum(np.minimum([1,2,3,4],[3]),2)
Out[59]: array([2, 2, 3, 3])
  # 4. sort all (proposal, score) pairs by score from highest to lowest
 # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
color=clsname.index(single_img.iloc[j,1])############
[(s,10)[s=="a"] for s in a]#666
[(s,10)[s is "a"] for s in a]
a = ["10" if s == "a" else s for s in a]
声明:该文观点仅代表作者本人,牛骨文系教育信息发布平台,牛骨文仅提供信息存储空间服务。