diff --git a/docs/source/_templates/_api_ref.pandas.general_functions_templ.rst b/docs/source/_templates/_api_ref.pandas.general_functions_templ.rst new file mode 100644 index 000000000..eb81b9536 --- /dev/null +++ b/docs/source/_templates/_api_ref.pandas.general_functions_templ.rst @@ -0,0 +1,73 @@ +.. _api_ref.pandas.general_functions: +.. include:: ./../ext_links.txt + +General Functions +================= +.. currentmodule:: pandas + +This section covers general Pandas operations + +Data manipulations +~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree +melt +pivot +pivot_table +crosstab +cut +qcut +merge +merge_ordered +merge_asof +concat +get_dummies +factorize +unique +wide_to_long + +Top-Level Missing Data +~~~~~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree +isna +isnull +notna +notnull + +Top-Level Conversions +~~~~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree +to_numeric + +Top-Level Working With Dates & Time +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree +to_datetime +to_timedelta +date_range +bdate_range +period_range +timedelta_range +infer_freq + +Top-Level Dealing With Intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree +interval_range + +Top-Level Evaluation +~~~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree + eval + +Hashing +~~~~~~~ + +.. sdc_toctree + util.hash_array + util.hash_pandas_object diff --git a/docs/source/_templates/_api_ref.pandas.groupby_templ.rst b/docs/source/_templates/_api_ref.pandas.groupby_templ.rst new file mode 100644 index 000000000..cd916fefb --- /dev/null +++ b/docs/source/_templates/_api_ref.pandas.groupby_templ.rst @@ -0,0 +1,125 @@ +.. _api_ref.pandas.groupby: +.. include:: ./../ext_links.txt + +GroupBy +======= +.. currentmodule:: pandas.core.groupby + +This section covers operations for grouping data in series and dataframes. + +GroupBy objects are returned by groupby calls: :func:`pandas.DataFrame.groupby`, :func:`pandas.Series.groupby`, etc. + +Indexing and Iteration +---------------------- + +.. sdc_toctree +GroupBy.__iter__ +GroupBy.groups +GroupBy.indices +GroupBy.get_group + +.. currentmodule:: pandas + +.. sdc_toctree +Grouper + +.. currentmodule:: pandas.core.groupby + +User-Defined Functions +---------------------- + +.. sdc_toctree +GroupBy.apply +GroupBy.agg +GroupBy.aggregate +GroupBy.transform +GroupBy.pipe + +Computations, Descriptive Statistics +------------------------------------ + +.. sdc_toctree +GroupBy.all +GroupBy.any +GroupBy.bfill +GroupBy.count +GroupBy.cumcount +GroupBy.cummax +GroupBy.cummin +GroupBy.cumprod +GroupBy.cumsum +GroupBy.ffill +GroupBy.first +GroupBy.head +GroupBy.last +GroupBy.max +GroupBy.mean +GroupBy.median +GroupBy.min +GroupBy.ngroup +GroupBy.nth +GroupBy.ohlc +GroupBy.prod +GroupBy.rank +GroupBy.pct_change +GroupBy.size +GroupBy.sem +GroupBy.std +GroupBy.sum +GroupBy.var +GroupBy.tail + +The following methods are available in both ``SeriesGroupBy`` and +``DataFrameGroupBy`` objects, but may differ slightly, usually in that +the ``DataFrameGroupBy`` version usually permits the specification of an +axis argument, and often an argument indicating whether to restrict +application to columns of a specific data type. + +.. sdc_toctree +DataFrameGroupBy.all +DataFrameGroupBy.any +DataFrameGroupBy.bfill +DataFrameGroupBy.corr +DataFrameGroupBy.count +DataFrameGroupBy.cov +DataFrameGroupBy.cummax +DataFrameGroupBy.cummin +DataFrameGroupBy.cumprod +DataFrameGroupBy.cumsum +DataFrameGroupBy.describe +DataFrameGroupBy.diff +DataFrameGroupBy.ffill +DataFrameGroupBy.fillna +DataFrameGroupBy.filter +DataFrameGroupBy.hist +DataFrameGroupBy.idxmax +DataFrameGroupBy.idxmin +DataFrameGroupBy.mad +DataFrameGroupBy.nunique +DataFrameGroupBy.pct_change +DataFrameGroupBy.plot +DataFrameGroupBy.quantile +DataFrameGroupBy.rank +DataFrameGroupBy.resample +DataFrameGroupBy.shift +DataFrameGroupBy.size +DataFrameGroupBy.skew +DataFrameGroupBy.take +DataFrameGroupBy.tshift + +The following methods are available only for ``SeriesGroupBy`` objects. + +.. sdc_toctree +SeriesGroupBy.nlargest +SeriesGroupBy.nsmallest +SeriesGroupBy.nunique +SeriesGroupBy.unique +SeriesGroupBy.value_counts +SeriesGroupBy.is_monotonic_increasing +SeriesGroupBy.is_monotonic_decreasing + +The following methods are available only for ``DataFrameGroupBy`` objects. + +.. sdc_toctree +DataFrameGroupBy.corrwith +DataFrameGroupBy.boxplot diff --git a/docs/source/_templates/_api_ref.pandas.io_templ.rst b/docs/source/_templates/_api_ref.pandas.io_templ.rst new file mode 100644 index 000000000..15d453d4c --- /dev/null +++ b/docs/source/_templates/_api_ref.pandas.io_templ.rst @@ -0,0 +1,124 @@ +.. _api_ref.pandas.io: +.. include:: ./../ext_links.txt + +Input-Output +============ +.. currentmodule:: pandas + +This section include `Pandas*`_ functions for input data of a specific format into memory and for output in-memory +data to external storage format. + + +Pickling +~~~~~~~~ + +.. sdc_toctree +read_pickle + +Flat Files +~~~~~~~~~~ + +.. sdc_toctree +read_table +read_csv +read_fwf + +Clipboard +~~~~~~~~~ + +.. sdc_toctree +read_clipboard + +Excel +~~~~~ + +.. sdc_toctree +read_excel +ExcelFile.parse +ExcelWriter + +JSON +~~~~ + +.. sdc_toctree +read_json + +.. currentmodule:: pandas.io.json + +.. sdc_toctree +json_normalize +build_table_schema + +.. currentmodule:: pandas + +HTML +~~~~ + +.. sdc_toctree +read_html + +HDFStore: PyTables (HDF5) +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. sdc_toctree +read_hdf +HDFStore.put +HDFStore.append +HDFStore.get +HDFStore.select +HDFStore.info +HDFStore.keys +HDFStore.groups +HDFStore.walk + +Feather +~~~~~~~ + +.. sdc_toctree +read_feather + +Parquet +~~~~~~~ + +.. sdc_toctree +read_parquet + +SAS +~~~ + +.. sdc_toctree +read_sas + +SPSS +~~~~ + +.. sdc_toctree +read_spss + +SQL +~~~ + +.. sdc_toctree +read_sql_table +read_sql_query +read_sql + +Google BigQuery +~~~~~~~~~~~~~~~ + +.. sdc_toctree +read_gbq + +STATA +~~~~~ + +.. sdc_toctree +read_stata + +.. currentmodule:: pandas.io.stata + +.. sdc_toctree +StataReader.data_label +StataReader.value_labels +StataReader.variable_labels +StataWriter.write_file diff --git a/docs/source/_templates/_api_ref.pandas.window_templ.rst b/docs/source/_templates/_api_ref.pandas.window_templ.rst index fbf6419cd..c0bd8ab6e 100644 --- a/docs/source/_templates/_api_ref.pandas.window_templ.rst +++ b/docs/source/_templates/_api_ref.pandas.window_templ.rst @@ -1,58 +1,79 @@ .. _api_ref.pandas.window: .. include:: ./../ext_links.txt -Pandas Window -============= -.. currentmodule:: pandas.core.window +Rolling Windows +=============== +This section covers a collection of moving windows operations on series and dataframes. + +Standard Moving Window objects are returned by ``.rolling`` calls: +:func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. + +Expanding Moving Window objects are returned by ``.expanding`` calls: +:func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. + +Exponentially-Weighted Moving Window objects are returned by ``.ewm`` calls: +:func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. -Standard moving window functions +Standard Moving Window Functions -------------------------------- +.. currentmodule:: pandas.core.window .. sdc_toctree - Rolling.count - Rolling.sum - Rolling.mean - Rolling.median - Rolling.var - Rolling.std - Rolling.min - Rolling.max - Rolling.corr - Rolling.cov - Rolling.skew - Rolling.kurt - Rolling.apply - Rolling.aggregate - Rolling.quantile - Window.mean - Window.sum - -Standard expanding window functions +Rolling.count +Rolling.sum +Rolling.mean +Rolling.median +Rolling.var +Rolling.std +Rolling.min +Rolling.max +Rolling.corr +Rolling.cov +Rolling.skew +Rolling.kurt +Rolling.apply +Rolling.aggregate +Rolling.quantile + +.. currentmodule:: pandas.core.window + +.. sdc_toctree +Window.mean +Window.sum + +..Window.var +..Window.std + +.. _api_ref.pandas.functions_expanding: + +Standard Expanding Window Functions ----------------------------------- +.. currentmodule:: pandas.core.window .. sdc_toctree - Expanding.count - Expanding.sum - Expanding.mean - Expanding.median - Expanding.var - Expanding.std - Expanding.min - Expanding.max - Expanding.corr - Expanding.cov - Expanding.skew - Expanding.kurt - Expanding.apply - Expanding.aggregate - Expanding.quantile - -Exponentially-weighted moving window functions +Expanding.count +Expanding.sum +Expanding.mean +Expanding.median +Expanding.var +Expanding.std +Expanding.min +Expanding.max +Expanding.corr +Expanding.cov +Expanding.skew +Expanding.kurt +Expanding.apply +Expanding.aggregate +Expanding.quantile + +Exponentially-Weighted Moving Window Functions ---------------------------------------------- +.. currentmodule:: pandas.core.window .. sdc_toctree - EWM.mean - EWM.std - EWM.var - EWM.corr - EWM.cov +EWM.mean +EWM.std +EWM.var +EWM.corr +EWM.cov diff --git a/docs/source/apireference.rst b/docs/source/apireference.rst index bf99f3543..c671f9226 100644 --- a/docs/source/apireference.rst +++ b/docs/source/apireference.rst @@ -6,6 +6,9 @@ API Reference .. toctree:: :maxdepth: 2 + Input-Output <./_api_ref/api_ref.pandas.io.rst> + General Functions <./_api_ref/api_ref.pandas.general_functions.rst> Series: Columnar Data Structure <./_api_ref/api_ref.pandas.series.rst> Dataframe: Tabular Data Structure <./_api_ref/api_ref.pandas.dataframe.rst> - Window <./_api_ref/api_ref.pandas.window.rst> + Rolling Window Functions <./_api_ref/api_ref.pandas.window.rst> + GroupBy: Grouping Data <./_api_ref/api_ref.pandas.groupby.rst> diff --git a/docs/source/buildscripts/apiref_generator.py b/docs/source/buildscripts/apiref_generator.py index 080e85787..2072acc19 100644 --- a/docs/source/buildscripts/apiref_generator.py +++ b/docs/source/buildscripts/apiref_generator.py @@ -25,7 +25,6 @@ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** -import pandas from sdc_object_utils import init_pandas_structure, init_sdc_structure, init_pandas_sdc_dict from sdc_object_utils import get_sdc_object_by_pandas_name, get_obj from sdc_object_utils import get_class_methods, get_class_attributes, get_fully_qualified_name @@ -35,9 +34,12 @@ APIREF_TEMPLATE_FNAMES = [ + './_templates/_api_ref.pandas.io_templ.rst', './_templates/_api_ref.pandas.series_templ.rst', './_templates/_api_ref.pandas.dataframe_templ.rst', + './_templates/_api_ref.pandas.general_functions_templ.rst', './_templates/_api_ref.pandas.window_templ.rst', + './_templates/_api_ref.pandas.groupby_templ.rst', ] @@ -52,14 +54,28 @@ def reformat(text): :param text: Original text with warnings :return: Modified text that fixes warnings """ + text = reformat_multiline_inline_literal(text) + text = reformat_reindent_code_block(text) + text = reformat_perceived_reference(text) text = reformat_replace_star_list_with_dash_list(text) # Must be called before :func:`reformat_asterisks` text = reformat_asterisks(text) # Fix for * and ** symbols text = reformat_explicit_markup(text) # Fix for explicit markup without a blank line text = reformat_bullet_list(text) # Fix bullet list indentation issues + text = reformat_bullet_list_add_blank_line(text) # Fix the lack of blank line before a bullet list text = reformat_remove_unresolved_references(text) # Fix unresolved references after removal of References sections return reformat_remove_multiple_blank_lines(text) +def reformat_perceived_reference(text): + """ + Searches for pattern ``_\*`` and replaces it with ``*\`` + + :param text: Original text with warnings + :return: Modified text that fixes warnings + """ + return text.replace('csv.QUOTE_', 'csv.QUOTE') + + def reformat_remove_unresolved_references(text): """ Fixes unresolved references after removing References sections. @@ -137,7 +153,117 @@ def reformat_remove_multiple_blank_lines(text): return new_text +def reformat_multiline_inline_literal(text): + """ + Fixes warning with multi-line inline literal when the inline `` starts in one line and finishes `` in another one. + + The limitation is that this function fixes only two-line literal but unwrapping it into a single line. + The function will not work for arbitrary length inline literals. + + :param text: Original text with warnings + :return: Modified text with fixed warnings + """ + new_text = '' + + while len(text) > 0: + idx = text.find('``') + + if idx >= 0: + # Copy as is the text before opening inline literal + new_text += text[:idx+2] + text = text[idx+2:] + + # Look for closing inline literal + idx = text.find('``') + if idx >= 0: + # Here if found closing inline literal + # Remove `\n` for inline literal + literal = text[:idx] + literal = literal.replace('\n', '') + new_text += literal + '``' + text = text[idx+2:] + else: + # No closing inline literal found. Copy text as is + new_text += text + text = '' + else: + # No opening inline literal found. Copy text as is + new_text += text + text = '' + + return new_text + + +def reformat_reindent_code_block(text): + """ + Fixes warnings related to un-indented code blocks starting with :: + + :param text: Original text with warnings + :return: Modified text with fixed warnings + """ + lines = text.split('\n\n') + new_text = '' + while len(lines) > 0: + line = lines[0] + if line.endswith(' ::'): + # Here if we found next code block + new_text += line + '\n\n' + + # Get the indent for the last line (the line with ::) + lns = line.split('\n') + indent = get_indent(lns[-1]) + if len(lines) > 1: + # Here if there is another paragraph after :: + new_text += reindent(lines[1], indent+4) + '\n\n' + lines.pop(0) + else: + # Here if it is not code block for which indent to be fixed + new_text += line + '\n\n' + lines.pop(0) + return new_text + + +def reformat_bullet_list_add_blank_line(text): + """ + Fixes the warning caused by situation when the new bullet list does not start with a blank line + + :param text: Original text + :return: Reformatted text with added blank lines before bullet list blocks + """ + lines = text.split('\n') + new_text = '' + while len(lines) > 0: + # Skip lines which do not start with `-` + while len(lines) > 0: + line = lines[0] + if line.strip().startswith('- '): + break + else: + new_text += line + '\n' + lines.pop(0) + + if len(lines) > 0: + # Here if we are in the beginning of the bullet list block + new_text += '\n' # Add blank line at the block beginning + + # Skip list items until the blank line reached + while len(lines) > 0: + line = lines[0] + new_text += line + '\n' + lines.pop(0) + if len(line.strip()) == 0: + # Reached the end of the bullet list block + break + return new_text + + def reformat_bullet_list(text): + """ + Reindent the bullet list + + :param text: Original text with warnings + :return: Modified text with fixed warnings + """ lines = text.split('\n') new_text = '' bullet_indent = -1 @@ -284,11 +410,24 @@ def _get_param_text(title, param): return ':param ' + param + ':' elif title == 'Return' or title == 'Returns': return ':return:' + elif title == 'Yield' or title == 'Yields': + return ':yields:' elif title == 'Raises': return ':raises:' # Internal function. Returns correct markup for Parameters section - def _reformat_parameters(title, text): + # This function assumes parameters are in the following format + # : + # + # + # : + # + # + def _reformat_parameters_colon_separator(title, text): lines = text.split('\n') new_text = '' @@ -326,6 +465,41 @@ def _reformat_parameters(title, text): new_text += _get_param_text(title, param) + '\n' + reindent(description, indent+4) + '\n' return new_text + def _reformat_parameters_sub_indent(title, text): + lines = text.split('\n') + new_text = '' + + if len(lines) == 0: + return new_text + + indent = get_indent(text) + while len(lines) > 0: + line = lines[0] + first_line_description = line + lines.pop(0) + + description = '' + while len(lines) > 0: + # Continue multi-line description until blank line met + line = lines[0] + if len(line.strip()) == 0: + # Blank line is either end of parameter description or separates paragraph/list + lines.pop(0) + description += '\n' + continue + + sub_indent = get_indent(line) + if sub_indent > indent: + # Here if multi-line description encountered + lines.pop(0) + description += line + '\n' + else: + # New parameter description without blank line + break + + new_text += _get_param_text(title, first_line_description) + '\n' + description + '\n' + return new_text + # Internal function. Returns correct markup for Raises section def _reformat_raises(title, text): lines = text.split('\n') @@ -385,7 +559,7 @@ def _reformat_returns(title, text): return new_text + '\n' if title.strip() == 'Parameters': - return _reformat_parameters(title, text) + return _reformat_parameters_sub_indent(title, text) elif title.strip() == 'Returns' or title.strip() == 'Return': return _reformat_returns(title, text) elif title.strip() == 'Raises': @@ -442,7 +616,7 @@ def generate_simple_object_doc(pandas_name, short_doc_flag=False, doc_from_panda elif title.strip() == 'References': # Exclude References section (may be too specific to Pandas) sections.pop(0) elif title.strip() == 'Parameters' or title.strip() == 'Raises' or title.strip() == 'Return' or \ - title.strip() == 'Returns': + title.strip() == 'Returns' or title.strip() == 'Yields': if reformat_pandas: doc += reformat_pandas_params(title, text) sections.pop(0) @@ -605,7 +779,7 @@ def parse_templ_rst(fname_templ): # Parsing lines until ``.. sdc_toctree`` section is met while len(doc) > 0 and not doc[0].startswith('.. sdc_toctree'): line = doc[0] - if line.startswith('.. currentmodule::'): + if line.strip().startswith('.. currentmodule::'): current_module_name = line[19:].strip() fout.write(line) doc.pop(0) diff --git a/docs/source/buildscripts/sdc_build_doc.py b/docs/source/buildscripts/sdc_build_doc.py index f9656088d..4da9d977d 100644 --- a/docs/source/buildscripts/sdc_build_doc.py +++ b/docs/source/buildscripts/sdc_build_doc.py @@ -70,14 +70,3 @@ def __init__(self, dist): self._remove_cwd_from_syspath() self.sdc_build_doc_command = BuildDoc(dist) self.sdc_build_doc_command.initialize_options() - - -# Sphinx Developer's Documentation Build - -#class build_devdoc(build.build): -# description = "Build developer's documentation" -# -# def run(self): -# spawn(['rm', '-rf', 'docs/_builddev']) -# spawn(['sphinx-build', '-b', 'html', '-d', 'docs/_builddev/docstrees', -# '-j1', 'docs/devsource', '-t', 'developer', 'docs/_builddev/html']) diff --git a/setup.py b/setup.py index 9da791535..b4cb7f3c0 100644 --- a/setup.py +++ b/setup.py @@ -179,6 +179,9 @@ def readme(): str_libs = np_compile_args['libraries'] +if not is_win: + str_libs += ['boost_regex'] + ext_str = Extension(name="sdc.hstr_ext", sources=["sdc/_str_ext.cpp"], libraries=str_libs,