如何在python中将多个if if循环转换为list comprehension

发布时间：2020-12-20 12:35:18 所属栏目：Python 来源：网络整理

导读：我正在使用以下if循环来创建大约一百万个桶,大约需要100万次观察,这需要花费很多时间.以下是我的if循环 def half_hourly_buckets(dataframe,time_column): dataframe[time_column] = pd.to_datetime(dataframe[time_column],format = '%H:%M:%S').dt.time fo

我正在使用以下if循环来创建大约一百万个桶,大约需要100万次观察,这需要花费很多时间.以下是我的if循环

def half_hourly_buckets(dataframe,time_column):
   dataframe[time_column] = pd.to_datetime(dataframe[time_column],format = '%H:%M:%S').dt.time
   for j in range(len(dataframe)):
    x = dataframe.loc[j,time_column]
    if (x >= datetime.time(0,1)) & (x <= datetime.time(0,30,0)):
        dataframe.loc[j,'half_hourly_bucket'] = "00:00:01 - 00:30:00"
    elif (x >= datetime.time(0,1)) & (x <= datetime.time(1,'half_hourly_bucket'] = "00:30:01 - 01:00:00"
    elif (x >= datetime.time(1,'half_hourly_bucket'] = "01:00:01 - 01:30:00"
    elif (x >= datetime.time(1,1)) & (x <= datetime.time(2,'half_hourly_bucket'] = "01:30:01 - 02:00:00"
    elif (x >= datetime.time(2,'half_hourly_bucket'] = "02:00:01 - 02:30:00"
    elif (x >= datetime.time(2,1)) & (x <= datetime.time(3,'half_hourly_bucket'] = "02:30:01 - 03:00:00"
    elif (x >= datetime.time(3,'half_hourly_bucket'] = "03:00:01 - 03:30:00"
    elif (x >= datetime.time(3,1)) & (x <= datetime.time(4,'half_hourly_bucket'] = "03:30:01 - 04:00:00"
    elif (x >= datetime.time(4,'half_hourly_bucket'] = "04:00:01 - 04:30:00"
    elif (x >= datetime.time(4,1)) & (x <= datetime.time(5,'half_hourly_bucket'] = "04:30:01 - 05:00:00"
    elif (x >= datetime.time(5,'half_hourly_bucket'] = "05:00:01 - 05:30:00"
    elif (x >= datetime.time(5,1)) & (x <= datetime.time(6,'half_hourly_bucket'] = "05:30:01 - 06:00:00"
    elif (x >= datetime.time(6,'half_hourly_bucket'] = "06:00:01 - 06:30:00"
    elif (x >= datetime.time(6,1)) & (x <= datetime.time(7,'half_hourly_bucket'] = "06:30:01 - 07:00:00"
    elif (x >= datetime.time(7,'half_hourly_bucket'] = "07:00:01 - 07:30:00"
    elif (x >= datetime.time(7,1)) & (x <= datetime.time(8,'half_hourly_bucket'] = "07:30:01 - 08:00:00"
    elif (x >= datetime.time(8,'half_hourly_bucket'] = "08:00:01 - 08:30:00"
    elif (x >= datetime.time(8,1)) & (x <= datetime.time(9,'half_hourly_bucket'] = "08:30:01 - 09:00:00"
    elif (x >= datetime.time(9,'half_hourly_bucket'] = "09:00:01 - 09:30:00"
    elif (x >= datetime.time(9,1)) & (x <= datetime.time(10,'half_hourly_bucket'] = "09:30:01 - 10:00:00"
    elif (x >= datetime.time(10,'half_hourly_bucket'] = "10:00:01 - 10:30:00"
    elif (x >= datetime.time(10,1)) & (x <= datetime.time(11,'half_hourly_bucket'] = "10:30:01 - 11:00:00"
    elif (x >= datetime.time(11,'half_hourly_bucket'] = "11:00:01 - 11:30:00"
    elif (x >= datetime.time(11,1)) & (x <= datetime.time(12,'half_hourly_bucket'] = "11:30:01 - 12:00:00"
    elif (x >= datetime.time(12,'half_hourly_bucket'] = "12:00:01 - 12:30:00"
    elif (x >= datetime.time(12,1)) & (x <= datetime.time(13,'half_hourly_bucket'] = "12:30:01 - 13:00:00"
    elif (x >= datetime.time(13,'half_hourly_bucket'] = "13:00:01 - 13:30:00"
    elif (x >= datetime.time(13,1)) & (x <= datetime.time(14,'half_hourly_bucket'] = "13:30:01 - 14:00:00"
    elif (x >= datetime.time(14,'half_hourly_bucket'] = "14:00:01 - 14:30:00"
    elif (x >= datetime.time(14,1)) & (x <= datetime.time(15,'half_hourly_bucket'] = "14:30:01 - 15:00:00"
    elif (x >= datetime.time(15,'half_hourly_bucket'] = "15:00:01 - 15:30:00"
    elif (x >= datetime.time(15,1)) & (x <= datetime.time(16,'half_hourly_bucket'] = "15:30:01 - 16:00:00"
    elif (x >= datetime.time(16,'half_hourly_bucket'] = "16:00:01 - 16:30:00"
    elif (x >= datetime.time(16,1)) & (x <= datetime.time(17,'half_hourly_bucket'] = "16:30:01 - 17:00:00"
    elif (x >= datetime.time(17,'half_hourly_bucket'] = "17:00:01 - 17:30:00"
    elif (x >= datetime.time(17,1)) & (x <= datetime.time(18,'half_hourly_bucket'] = "17:30:01 - 18:00:00"
    elif (x >= datetime.time(18,'half_hourly_bucket'] = "18:00:01 - 18:30:00"
    elif (x >= datetime.time(18,1)) & (x <= datetime.time(19,'half_hourly_bucket'] = "18:30:01 - 19:00:00"
    elif (x >= datetime.time(19,'half_hourly_bucket'] = "19:00:01 - 19:30:00"
    elif (x >= datetime.time(19,1)) & (x <= datetime.time(20,'half_hourly_bucket'] = "19:30:01 - 20:00:00"
    elif (x >= datetime.time(20,'half_hourly_bucket'] = "20:00:01 - 20:30:00"
    elif (x >= datetime.time(20,1)) & (x <= datetime.time(21,'half_hourly_bucket'] = "20:30:01 - 21:00:00"
    elif (x >= datetime.time(21,'half_hourly_bucket'] = "21:00:01 - 21:30:00"
    elif (x >= datetime.time(21,1)) & (x <= datetime.time(22,'half_hourly_bucket'] = "21:30:01 - 22:00:00"
    elif (x >= datetime.time(22,'half_hourly_bucket'] = "22:00:01 - 22:30:00"
    elif (x >= datetime.time(22,1)) & (x <= datetime.time(23,'half_hourly_bucket'] = "22:30:01 - 23:00:00"
    elif (x >= datetime.time(23,'half_hourly_bucket'] = "23:00:01 - 23:30:00"
    else:
        dataframe.loc[j,'half_hourly_bucket'] = "23:30:01 - 00:00:00"
return dataframe

有没有办法避免这种循环并提高处理速度？

解决方法

您可以使用Pandas中的矢量化操作快速完成此操作.唯一的技巧是将datetime.time值转换为Pandas可以使用的时间戳.这里有一些代码可以在大约4秒内完成整个工作：

import datetime,random
import pandas as pd,numpy as np

# create random times
df = pd.DataFrame({'time': [
    datetime.time(int(24 * h),int(60 * m),int(60 * s)) 
    for h,m,s in np.random.rand(500000,3)
]})

def half_hourly_buckets(dataframe,time_column):
    # convert time values to datetimes on arbitrary date
    base_date = datetime.datetime(2000,1,1)
    dt = dataframe[time_column].apply(
        lambda t: datetime.datetime.combine(base_date,t)
    )
    # assign corresponding bins
    one_second = pd.Timedelta(seconds=1)
    thirty_minutes = pd.Timedelta(minutes=30)
    bin = (dt - one_second).dt.floor('30T')
    dataframe['half_hourly_bucket'] = (
        (bin + one_second).dt.strftime("%H:%M:%S") 
        + ' - '
        + (bin + thirty_minutes).dt.strftime("%H:%M:%S") 
    )

half_hourly_buckets(df,'time')
df
#             time   half_hourly_bucket
# 0       07:00:49  07:00:01 - 07:30:00
# 1       06:16:19  06:00:01 - 06:30:00
# 2       06:17:06  06:00:01 - 06:30:00
# 3       17:28:31  17:00:01 - 17:30:00
# ...          ...                  ...
# 739     18:00:01  18:00:01 - 18:30:00
# ...          ...                  ...
# 4259    00:00:00  23:30:01 - 00:00:00
# ...          ...                  ...
# 4520    17:30:00  17:00:01 - 17:30:00
# ...          ...                  ...

（编辑：李大同）

【声明】本站内容均来自网络，其相关言论仅代表作者个人观点，不代表本站立场。若无意侵犯到您的权利，请及时与联系站长删除相关内容!