Merge pull request #887 from bashtage/final-fixes

bashtage · web-flow · commit 6846ba19cc06 · 2021-07-13T11:14:24.000+01:00
MAINT: Final fixes
diff --git a/pandas_datareader/_utils.py b/pandas_datareader/_utils.py
@@ -53,8 +53,11 @@ def _sanitize_dates(start, end):
     return start, end
 
 
-def _init_session(session, retry_count=3):
+def _init_session(session):
     if session is None:
         session = requests.Session()
         # do not set requests max_retries here to support arbitrary pause
+    else:
+        if not isinstance(session, requests.Session):
+            raise TypeError("session must be a request.Session")
     return session
diff --git a/pandas_datareader/base.py b/pandas_datareader/base.py
@@ -70,7 +70,7 @@ def __init__(
         self.pause = pause
         self.timeout = timeout
         self.pause_multiplier = 1
-        self.session = _init_session(session, retry_count)
+        self.session = _init_session(session)
         self.freq = freq
         self.headers = None
 
@@ -148,11 +148,7 @@ def _get_response(self, url, params=None, headers=None):
         params : dict or None
             parameters passed to the URL
         """
-
-        # Use default headers if not passes and not using a user session
-        if headers is None:
-            headers = self.headers
-
+        headers = headers or self.headers
         pause = self.pause
         last_response_text = ""
         for _ in range(self.retry_count + 1):
diff --git a/pandas_datareader/compat/__init__.py b/pandas_datareader/compat/__init__.py
@@ -39,11 +39,16 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None):
     # for "get_filepath_or_buffer" starting in pandas >= 0.20.0
     if isinstance(filepath_or_buffer, dict):
         return filepath_or_buffer, encoding, compression
-
-    tmp = com._get_filepath_or_buffer(
-        filepath_or_buffer, encoding=encoding, compression=None
-    )
-    return tmp.filepath_or_buffer, tmp.encoding, tmp.compression
+    try:
+        tmp = com._get_filepath_or_buffer(
+            filepath_or_buffer, encoding=encoding, compression=None
+        )
+        return tmp.filepath_or_buffer, tmp.encoding, tmp.compression
+    except AttributeError:
+        tmp = com.get_filepath_or_buffer(
+            filepath_or_buffer, encoding=encoding, compression=None
+        )
+        return tmp
 
 
 string_types = (str,)
diff --git a/pandas_datareader/naver.py b/pandas_datareader/naver.py
@@ -35,7 +35,7 @@ def __init__(
         if not isinstance(symbols, string_types):
             raise NotImplementedError("Bulk-fetching is not implemented")
 
-        super(NaverDailyReader, self).__init__(
+        super().__init__(
             symbols=symbols,
             start=start,
             end=end,
diff --git a/pandas_datareader/tests/test_econdb.py b/pandas_datareader/tests/test_econdb.py
@@ -58,7 +58,7 @@ def test_get_tourism(self):
             start=pd.Timestamp("2008-01-01"),
             end=pd.Timestamp("2012-01-01"),
         )
-        df = df.astype(np.float)
+        df = df.astype(float)
         jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
         us = np.array(
             [175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
diff --git a/pandas_datareader/tests/test_famafrench.py b/pandas_datareader/tests/test_famafrench.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 from pandas import testing as tm
 import pytest
@@ -48,49 +49,49 @@ def test_f_f_research(self):
             {
                 "Mkt-RF": [
                     -3.36,
-                    3.4,
+                    3.40,
                     6.31,
-                    2.0,
+                    2.00,
                     -7.89,
-                    -5.56,
+                    -5.57,
                     6.93,
                     -4.77,
                     9.54,
                     3.88,
-                    0.6,
+                    0.60,
                     6.82,
                 ],
                 "SMB": [
-                    0.38,
-                    1.2,
-                    1.42,
-                    4.98,
-                    0.05,
-                    -1.97,
-                    0.16,
-                    -3.00,
-                    3.92,
-                    1.15,
-                    3.70,
-                    0.7,
+                    0.37,
+                    1.19,
+                    1.44,
+                    4.86,
+                    0.14,
+                    -1.84,
+                    0.18,
+                    -3.02,
+                    3.93,
+                    1.07,
+                    3.78,
+                    0.68,
                 ],
                 "HML": [
-                    0.31,
-                    3.16,
-                    2.1,
-                    2.81,
-                    -2.38,
-                    -4.5,
-                    -0.27,
-                    -1.95,
-                    -3.12,
-                    -2.59,
-                    -0.9,
-                    3.81,
+                    0.33,
+                    3.19,
+                    2.11,
+                    2.91,
+                    -2.39,
+                    -4.52,
+                    -0.36,
+                    -1.90,
+                    -3.23,
+                    -2.46,
+                    -0.95,
+                    3.64,
                 ],
                 "RF": [
-                    0.0,
-                    0.0,
+                    0.00,
+                    0.00,
                     0.01,
                     0.01,
                     0.01,
@@ -106,7 +107,10 @@ def test_f_f_research(self):
             index=pd.period_range("2010-01-01", "2010-12-01", freq="M", name="Date"),
             columns=["Mkt-RF", "SMB", "HML", "RF"],
         )
-        tm.assert_frame_equal(results[0], exp, check_less_precise=0)
+        received = results[0]
+        np.testing.assert_allclose(received, exp)
+        tm.assert_index_equal(received.index, exp.index)
+        tm.assert_index_equal(received.columns, exp.columns)
 
     def test_me_breakpoints(self):
         results = web.DataReader(
diff --git a/pandas_datareader/tests/test_fred.py b/pandas_datareader/tests/test_fred.py
@@ -72,7 +72,9 @@ def test_fred_multi(self):  # pragma: no cover
         )
         expected.index.rename("DATE", inplace=True)
         expected.index.freq = "MS"
-        tm.assert_frame_equal(received, expected, check_less_precise=True)
+        np.testing.assert_allclose(received, expected)
+        tm.assert_index_equal(received.index, expected.index)
+        tm.assert_index_equal(received.columns, expected.columns)
 
     def test_fred_multi_bad_series(self):
         names = ["NOTAREALSERIES", "CPIAUCSL", "ALSO FAKE"]
diff --git a/pandas_datareader/tests/yahoo/test_options.py b/pandas_datareader/tests/yahoo/test_options.py
@@ -9,7 +9,7 @@
 from pandas_datareader import data as web
 
 
-@pytest.yield_fixture
+@pytest.fixture
 def aapl():
     aapl = web.Options("aapl", "yahoo")
     yield aapl
diff --git a/pandas_datareader/tests/yahoo/test_yahoo.py b/pandas_datareader/tests/yahoo/test_yahoo.py
@@ -159,7 +159,7 @@ def test_get_data_null_as_missing_data(self, adj_pr):
         else:
             floats.append("Adj Close")
 
-        assert result[floats].dtypes.all() == np.floating
+        assert result[floats].dtypes.all() == np.float64
 
     @skip_on_exception(RemoteDataError)
     def test_get_data_multiple_symbols_two_dates(self):
@@ -168,7 +168,7 @@ def test_get_data_multiple_symbols_two_dates(self):
         assert result.size == 3
 
         # sanity checking
-        assert result.dtypes == np.floating
+        assert result.dtypes == np.float64
 
         expected = np.array(
             [
@@ -207,12 +207,12 @@ def test_get_data_yahoo_actions(self):
         assert actions.loc["2005-02-28", "value"][0] == 1 / 2.0
 
         assert actions.loc["1995-11-21", "action"][0] == "DIVIDEND"
-        assert round(actions.loc["1995-11-21", "value"][0], 3) == 0.120
+        assert round(actions.loc["1995-11-21", "value"][0], 3) == 0.030
 
         actions = web.get_data_yahoo_actions("AAPL", start, end, adjust_dividends=True)
 
         assert actions.loc["1995-11-21", "action"][0] == "DIVIDEND"
-        assert round(actions.loc["1995-11-21", "value"][0], 4) == 0.0043
+        assert round(actions.loc["1995-11-21", "value"][0], 4) == 0.0011
 
     def test_get_data_yahoo_actions_invalid_symbol(self):
         start = datetime(1990, 1, 1)
@@ -226,14 +226,14 @@ def test_yahoo_reader_class(self):
         r = YahooDailyReader("GOOG", start="JAN-01-2015")
         df = r.read()
 
-        assert df.Volume.loc["JAN-02-2015"] == 1447500
+        assert df.Volume.loc["JAN-02-2015"] == 1447563
 
         session = requests.Session()
 
         r = YahooDailyReader("GOOG", session=session)
         assert r.session is session
 
-    def test_yahoo_DataReader(self):
+    def test_yahoo_datareader(self):
         start = datetime(2010, 1, 1)
         end = datetime(2015, 5, 9)
         # yahoo will adjust for dividends by default
@@ -275,19 +275,19 @@ def test_yahoo_DataReader(self):
                     "DIVIDEND",
                 ],
                 "value": [
-                    0.52,
-                    0.47,
-                    0.47,
-                    0.47,
-                    0.14285714,
-                    0.47,
-                    0.43571,
-                    0.43571,
-                    0.43571,
-                    0.43571,
-                    0.37857,
-                    0.37857,
-                    0.37857,
+                    0.130000,
+                    0.117500,
+                    0.117500,
+                    0.117500,
+                    0.142857,
+                    0.117500,
+                    0.108929,
+                    0.108929,
+                    0.108929,
+                    0.108929,
+                    0.094643,
+                    0.094643,
+                    0.094643,
                 ],
             },
             index=exp_idx,
@@ -316,19 +316,19 @@ def test_yahoo_DataReader(self):
                     "DIVIDEND",
                 ],
                 "value": [
-                    0.52,
-                    0.47,
-                    0.47,
-                    0.47,
-                    0.14285714,
-                    3.29,
-                    3.05,
-                    3.05,
-                    3.05,
-                    3.05,
-                    2.65,
-                    2.65,
-                    2.65,
+                    0.1300,
+                    0.1175,
+                    0.1175,
+                    0.1175,
+                    0.1429,
+                    0.8225,
+                    0.7625,
+                    0.7625,
+                    0.7625,
+                    0.7625,
+                    0.6625,
+                    0.6625,
+                    0.6625,
                 ],
             },
             index=exp_idx,
@@ -344,13 +344,13 @@ def test_yahoo_DataReader(self):
         result = web.DataReader("NTR", "yahoo-actions", start, end)
 
         exp_idx = pd.DatetimeIndex(
-            ["2018-12-28", "2018-09-27", "2018-06-28", "2018-03-28", "2018-01-02"]
+            ["2018-12-28", "2018-09-27", "2018-06-28", "2018-03-28"]
         )
 
         exp = pd.DataFrame(
             {
-                "action": ["DIVIDEND", "DIVIDEND", "DIVIDEND", "DIVIDEND", "SPLIT"],
-                "value": [0.43, 0.40, 0.40, 0.40, 1.00],
+                "action": ["DIVIDEND", "DIVIDEND", "DIVIDEND", "DIVIDEND"],
+                "value": [0.43, 0.40, 0.40, 0.40],
             },
             index=exp_idx,
         )
diff --git a/pandas_datareader/yahoo/_headers.py b/pandas_datareader/yahoo/_headers.py
@@ -0,0 +1,13 @@
+"""
+Default header
+"""
+DEFAULT_HEADERS = {
+    "Connection": "keep-alive",
+    "Expires": str(-1),
+    "Upgrade-Insecure-Requests": str(1),
+    # Google Chrome:
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    ),
+}
diff --git a/pandas_datareader/yahoo/actions.py b/pandas_datareader/yahoo/actions.py
@@ -12,7 +12,7 @@ class YahooActionReader(YahooDailyReader):
     """
 
     def read(self):
-        data = super(YahooActionReader, self).read()
+        data = super().read()
         actions = {}
         if isinstance(data.columns, MultiIndex):
             data = data.swaplevel(0, 1, axis=1)
diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py
diff --git a/pandas_datareader/yahoo/quotes.py b/pandas_datareader/yahoo/quotes.py
diff --git a/setup.cfg b/setup.cfg

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ def test_get_tourism(self):`
`58`	`58`	`start=pd.Timestamp("2008-01-01"),`
`59`	`59`	`end=pd.Timestamp("2012-01-01"),`
`60`	`60`	`)`
`61`		`- df = df.astype(np.float)`
	`61`	`+ df = df.astype(float)`
`62`	`62`	`jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)`
`63`	`63`	`us = np.array(`
`64`	`64`	`[175702304, 160507424, 164079728, 167600272, 171320416], dtype=float`