AttributeError: 'numpy.int64' object has no attribute 'to_timestamp'
13,334
Apparently the index of your DataFrame is not a pandas.PeriodIndex
. Instead, the index appears hold integers. The code that you posted requires the index of the data frame to be a PeriodIndex
. E.g.
In [36]: df
Out[36]:
a b
2012-01 1.457900 7.084201
2012-02 1.775861 6.448277
2012-03 1.069051 7.861898
In [37]: df.index
Out[37]: PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]', freq='M')
When the index is the correct type, the following code (similar to the line in the code you posted) works:
In [39]: dates = [zzz.to_timestamp().date() for zzz in df.index]
In [40]: dates
Out[40]:
[datetime.date(2012, 1, 1),
datetime.date(2012, 2, 1),
datetime.date(2012, 3, 1)]
Author by
liv2hak
Updated on June 04, 2022Comments
-
liv2hak almost 2 years
I am trying to plot a time series from a python data frame. The code is below.
import requests from bs4 import BeautifulSoup import pandas as pd import datetime import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.dates import DateFormatter, YearLocator, MonthLocator plt.style.use('ggplot') def plot(df, filename, heading=None): fig, ax = plt.subplots(figsize=(8, 4)) min_date = None max_date = None for col_name in df.columns.values: # plot the column col = df[col_name] col = col[col.notnull()] # drop NAs dates = [zzz.to_timestamp().date() for zzz in col.index] ax.plot_date(x=dates, y=col, fmt='-', label=col_name, tz=None, xdate=True, ydate=False, linewidth=1.5) # establish the date range for the data if min_date: min_date = min(min_date, min(dates)) else: min_date = min(dates) if max_date: max_date = max(max_date, max(dates)) else: max_date = max(dates) # give a bit of space at each end of the plot - aesthetics span = max_date - min_date extra = int(span.days * 0.03) * datetime.timedelta(days=1) ax.set_xlim([min_date - extra, max_date + extra]) # format the x tick marks ax.xaxis.set_major_formatter(DateFormatter('%Y')) ax.xaxis.set_minor_formatter(DateFormatter('\n%b')) ax.xaxis.set_major_locator(YearLocator()) ax.xaxis.set_minor_locator(MonthLocator(bymonthday=1, interval=2)) # grid, legend and yLabel ax.grid(True) ax.legend(loc='best', prop={'size':'x-small'}) ax.set_ylabel('Percent') # heading if heading: fig.suptitle(heading, fontsize=12) fig.tight_layout(pad=1.5) # footnote fig.text(0.99, 0.01, 'nse-timeseries-plot', ha='right', va='bottom', fontsize=8, color='#999999') # save to file fig.savefig(filename, dpi=125) url = "https://www.google.com/finance/historical?cid=207437&startdate=Jan%201%2C%201971&enddate=Jul%201%2C%202017&start={0}&num=30" how_many_pages=138 start=0 for i in range(how_many_pages): new_url = url.format(start) page = requests.get(new_url) soup = BeautifulSoup(page.content, "lxml") table = soup.find_all('table', class_='gf-table historical_price')[0] columns_header = [th.getText() for th in table.findAll('tr')[0].findAll('th')] data_rows=table.findAll('tr')[1:] data=[[td.getText() for td in data_rows[i].findAll(['td'])] for i in range(len(data_rows))] if start == 0: final_df = pd.DataFrame(data, columns=columns_header) else: df = pd.DataFrame(data, columns=columns_header) final_df = pd.concat([final_df, df],axis=0) start += 30 final_df.to_csv('nse_data.csv', sep='\t', encoding='utf-8') plot(final_df,'nsetsplot')
When I run the code I get the error
AttributeError: 'numpy.int64' object has no attribute 'to_timestamp'
when I do
dates = [zzz.to_timestamp().date() for zzz in col.index]
I am using Anaconda 64-bit on Windows 7 (x86_64)