...
 
Commits (3)
notebook/.ipynb_checkpoints/
"Month","CO2 (ppm) mauna loa, 1965-1980"
"1965-01",319.32
"1965-02",320.36
"1965-03",320.82
"1965-04",322.06
"1965-05",322.17
"1965-06",321.95
"1965-07",321.20
"1965-08",318.81
"1965-09",317.82
"1965-10",317.37
"1965-11",318.93
"1965-12",319.09
"1966-01",319.94
"1966-02",320.98
"1966-03",321.81
"1966-04",323.03
"1966-05",323.36
"1966-06",323.11
"1966-07",321.65
"1966-08",319.64
"1966-09",317.86
"1966-10",317.25
"1966-11",319.06
"1966-12",320.26
"1967-01",321.65
"1967-02",321.81
"1967-03",322.36
"1967-04",323.67
"1967-05",324.17
"1967-06",323.39
"1967-07",321.93
"1967-08",320.29
"1967-09",318.58
"1967-10",318.60
"1967-11",319.98
"1967-12",321.25
"1968-01",321.88
"1968-02",322.47
"1968-03",323.17
"1968-04",324.23
"1968-05",324.88
"1968-06",324.75
"1968-07",323.47
"1968-08",321.34
"1968-09",319.56
"1968-10",319.45
"1968-11",320.45
"1968-12",321.92
"1969-01",323.40
"1969-02",324.21
"1969-03",325.33
"1969-04",326.31
"1969-05",327.01
"1969-06",326.24
"1969-07",325.37
"1969-08",323.12
"1969-09",321.85
"1969-10",321.31
"1969-11",322.31
"1969-12",323.72
"1970-01",324.60
"1970-02",325.57
"1970-03",326.55
"1970-04",327.80
"1970-05",327.80
"1970-06",327.54
"1970-07",326.28
"1970-08",324.63
"1970-09",323.12
"1970-10",323.11
"1970-11",323.99
"1970-12",325.09
"1971-01",326.12
"1971-02",326.61
"1971-03",327.16
"1971-04",327.92
"1971-05",329.14
"1971-06",328.80
"1971-07",327.52
"1971-08",325.62
"1971-09",323.61
"1971-10",323.80
"1971-11",325.10
"1971-12",326.25
"1972-01",326.93
"1972-02",327.83
"1972-03",327.95
"1972-04",329.91
"1972-05",330.22
"1972-06",329.25
"1972-07",328.11
"1972-08",326.39
"1972-09",324.97
"1972-10",325.32
"1972-11",326.54
"1972-12",327.71
"1973-01",328.73
"1973-02",329.69
"1973-03",330.47
"1973-04",331.69
"1973-05",332.65
"1973-06",332.24
"1973-07",331.03
"1973-08",329.36
"1973-09",327.60
"1973-10",327.29
"1973-11",328.28
"1973-12",328.79
"1974-01",329.45
"1974-02",330.89
"1974-03",331.63
"1974-04",332.85
"1974-05",333.28
"1974-06",332.47
"1974-07",331.34
"1974-08",329.53
"1974-09",327.57
"1974-10",327.57
"1974-11",328.53
"1974-12",329.69
"1975-01",330.45
"1975-02",330.97
"1975-03",331.64
"1975-04",332.87
"1975-05",333.61
"1975-06",333.55
"1975-07",331.90
"1975-08",330.05
"1975-09",328.58
"1975-10",328.31
"1975-11",329.41
"1975-12",330.63
"1976-01",331.63
"1976-02",332.46
"1976-03",333.36
"1976-04",334.45
"1976-05",334.82
"1976-06",334.32
"1976-07",333.05
"1976-08",330.87
"1976-09",329.24
"1976-10",328.87
"1976-11",330.18
"1976-12",331.50
"1977-01",332.81
"1977-02",333.23
"1977-03",334.55
"1977-04",335.82
"1977-05",336.44
"1977-06",335.99
"1977-07",334.65
"1977-08",332.41
"1977-09",331.32
"1977-10",330.73
"1977-11",332.05
"1977-12",333.53
"1978-01",334.66
"1978-02",335.07
"1978-03",336.33
"1978-04",337.39
"1978-05",337.65
"1978-06",337.57
"1978-07",336.25
"1978-08",334.39
"1978-09",332.44
"1978-10",332.25
"1978-11",333.59
"1978-12",334.76
"1979-01",335.89
"1979-02",336.44
"1979-03",337.63
"1979-04",338.54
"1979-05",339.06
"1979-06",338.95
"1979-07",337.41
"1979-08",335.71
"1979-09",333.68
"1979-10",333.69
"1979-11",335.05
"1979-12",336.53
"1980-01",337.81
"1980-02",338.16
"1980-03",339.88
"1980-04",340.57
"1980-05",341.19
"1980-06",340.87
"1980-07",339.25
"1980-08",337.19
"1980-09",335.49
"1980-10",336.63
"1980-11",337.74
"1980-12",338.36
CO2 (ppm) mauna loa, 1965-1980
This diff is collapsed.
"Month","Sales of shampoo over a three year period"
"1-01",266.0
"1-02",145.9
"1-03",183.1
"1-04",119.3
"1-05",180.3
"1-06",168.5
"1-07",231.8
"1-08",224.5
"1-09",192.8
"1-10",122.9
"1-11",336.5
"1-12",185.9
"2-01",194.3
"2-02",149.5
"2-03",210.1
"2-04",273.3
"2-05",191.4
"2-06",287.0
"2-07",226.0
"2-08",303.6
"2-09",289.9
"2-10",421.6
"2-11",264.5
"2-12",342.3
"3-01",339.7
"3-02",440.4
"3-03",315.9
"3-04",439.3
"3-05",401.3
"3-06",437.4
"3-07",575.5
"3-08",407.6
"3-09",682.0
"3-10",475.3
"3-11",581.3
"3-12",646.9
Sales of shampoo over a three year period
"Year","Wolfer sunspot numbers, 1770 to 1869"
"1770",101
"1771",82
"1772",67
"1773",35
"1774",31
"1775",7
"1776",20
"1777",93
"1778",154
"1779",126
"1780",85
"1781",68
"1782",39
"1783",23
"1784",10
"1785",24
"1786",83
"1787",132
"1788",131
"1789",118
"1790",90
"1791",67
"1792",60
"1793",47
"1794",41
"1795",21
"1796",16
"1797",6
"1798",4
"1799",7
"1800",15
"1801",34
"1802",45
"1803",43
"1804",48
"1805",42
"1806",28
"1807",10
"1808",8
"1809",3
"1810",0
"1811",1
"1812",5
"1813",12
"1814",14
"1815",35
"1816",46
"1817",41
"1818",30
"1819",24
"1820",16
"1821",7
"1822",4
"1823",2
"1824",9
"1825",17
"1826",36
"1827",50
"1828",64
"1829",67
"1830",71
"1831",48
"1832",28
"1833",9
"1834",13
"1835",57
"1836",122
"1837",138
"1838",103
"1839",86
"1840",65
"1841",37
"1842",24
"1843",11
"1844",15
"1845",40
"1846",62
"1847",99
"1848",125
"1849",96
"1850",67
"1851",65
"1852",54
"1853",39
"1854",21
"1855",7
"1856",4
"1857",23
"1858",55
"1859",94
"1860",96
"1861",77
"1862",59
"1863",44
"1864",47
"1865",31
"1866",16
"1867",7
"1868",38
"1869",74
Wolfer sunspot numbers, 1770 to 1869
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Time Series Modelling\n",
"\n",
"Select at least two appropriate [time series datasets](https://datamarket.com/data/list/?q=provider:tsdl) and compare the following three types of Neural Networks:\n",
"\n",
" * convolutional neural networks\n",
" * basic recurrent neural networks\n",
" * gated recurrent neural networks ([Long Short-Term Memory (LSTM)](https://colah.github.io/posts/2015-08-Understanding-LSTMs/) or [Gated Recurrent Unit (GRU)](https://en.wikipedia.org/wiki/Gated_recurrent_unit))\n",
"\n",
"Make sure the datasets are large enough for the networks to train properly and use training, validation and testing splits to measure generalization."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Links\n",
"\n",
"* [A Guide for Time Series Prediction Using Recurrent Neural Networks (LSTMs)](https://blog.statsbot.co/time-series-prediction-using-recurrent-neural-networks-lstms-807fa6ca7f)\n",
"* [How (not) to use Machine Learning for time series forecasting: Avoiding the pitfalls](https://towardsdatascience.com/how-not-to-use-machine-learning-for-time-series-forecasting-avoiding-the-pitfalls-19f9d7adf424)\n",
"* [Time-Series Analysis Using Recurrent Neural Networks in Tensorflow](https://medium.com/themlblog/time-series-analysis-using-recurrent-neural-networks-in-tensorflow-2a0478b00be7)\n",
"* [Time Series Forecasting with the Long Short-Term Memory Network in Python](https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notes\n",
"\n",
"Investigate whether the neural networks should have a single input and output, multiple inputs and one output or multiple inputs and multiple outputs. Trying out multiple options isn't out of question either."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import pandas\n",
"import pathlib"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_dir_path = pathlib.Path('..') / 'data'\n",
"\n",
"raw_data_paths = {\n",
" # https://datamarket.com/data/set/22v1/\n",
" 'co2': data_dir_path / 'co2-ppm-mauna-loa-19651980.csv',\n",
" \n",
" # https://datamarket.com/data/set/22pw/\n",
" 'lake': data_dir_path / 'monthly-lake-erie-levels-1921-19.csv',\n",
" \n",
" # https://datamarket.com/data/set/22r0/\n",
" 'shampoo': data_dir_path / 'sales-of-shampoo-over-a-three-ye.csv',\n",
" \n",
" # https://datamarket.com/data/set/22wh/\n",
" 'sunspot': data_dir_path / 'wolfer-sunspot-numbers-1770-to-1.csv',\n",
"}\n",
"\n",
"for path in raw_data_paths.values():\n",
" assert(path.exists())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = {}\n",
"for key in raw_data_paths.keys():\n",
" # Including a validation dataset is an option too.\n",
" data[key] = {\n",
" 'training': None,\n",
" 'testing': None,\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def to_stationary(time_series):\n",
" stationary = time_series.diff()\n",
" return stationary.fillna(0)\n",
"\n",
"\n",
"def split_data(series):\n",
" \"\"\"Split data into training and testing data.\"\"\"\n",
" \n",
" # TODO: Consider choosing a random subinterval. A sliding interval could be used to generate many\n",
" # training and testing dataset pairs.\n",
" fp_bound = 0.7 * len(series)\n",
" bound = int(math.floor(fp_bound))\n",
" \n",
" training_data = series.iloc[:bound]\n",
" testing_data = series.iloc[bound:]\n",
" \n",
" assert(len(training_data) > 0)\n",
" assert(len(testing_data) > 0)\n",
" \n",
" return training_data, testing_data\n",
"\n",
"\n",
"def normalized(training, testing):\n",
" mean = training.mean()\n",
" std = training.std() # TODO: what should ddof be set to?\n",
" \n",
" training = (training - mean) / std\n",
" testing = (testing - mean) / std\n",
" \n",
" return training, testing\n",
"\n",
"\n",
"for data_name, raw_data_path in raw_data_paths.items():\n",
" data_frame = pandas.read_csv(raw_data_path, engine='python', skipfooter=2)\n",
" time_series = data_frame[data_frame.columns[1]]\n",
" \n",
" time_series = to_stationary(time_series)\n",
" training, testing = split_data(time_series)\n",
" \n",
" training, testing = normalized(training, testing)\n",
" \n",
" data[data_name]['training'] = training\n",
" data[data_name]['testing'] = testing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for value in data.values():\n",
" for s in ('training', 'testing'):\n",
" print(value[s].head())\n",
" print('Rows:', len(value[s]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}