{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[this doc on github](https://github.com/dotnet/interactive/tree/master/samples/notebooks/csharp/Samples)\n", "\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "#r \"nuget:Microsoft.Data.Analysis,0.2.0\"\n", "using Microsoft.Data.Analysis;" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "PrimitiveDataFrameColumn dateTimes = new PrimitiveDataFrameColumn(\"DateTimes\"); // Default length is 0.\n", "PrimitiveDataFrameColumn ints = new PrimitiveDataFrameColumn(\"Ints\", 3); // Makes a column of length 3. Filled with nulls initially\n", "StringDataFrameColumn strings = new StringDataFrameColumn(\"Strings\", 3); // Makes a column of length 3. Filled with nulls initially" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "// Append 3 values to dateTimes\n", "dateTimes.Append(DateTime.Parse(\"2019/01/01\"));\n", "dateTimes.Append(DateTime.Parse(\"2019/01/01\"));\n", "dateTimes.Append(DateTime.Parse(\"2019/01/02\"));" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "DataFrame df = new DataFrame(dateTimes, ints, strings ); // This will throw if the columns are of different lengths" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
<null>
<null>
1
2019-01-01 00:00:00Z
<null>
<null>
2
2019-01-02 00:00:00Z
<null>
<null>
" }, "metadata": {}, "execution_count": 39 } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "using Microsoft.AspNetCore.Html;\n", "Formatter.Register((df, writer) =>\n", "{\n", " var headers = new List();\n", " headers.Add(th(i(\"index\")));\n", " headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));\n", " var rows = new List>();\n", " var take = 20;\n", " for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)\n", " {\n", " var cells = new List();\n", " cells.Add(td(i));\n", " foreach (var obj in df.Rows[i])\n", " {\n", " cells.Add(td(obj));\n", " }\n", " rows.Add(cells);\n", " }\n", " \n", " var t = table(\n", " thead(\n", " headers),\n", " tbody(\n", " rows.Select(\n", " r => tr(r))));\n", " \n", " writer.Write(t);\n", "}, \"text/html\");" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
<null>
<null>
1
2019-01-01 00:00:00Z
<null>
<null>
2
2019-01-02 00:00:00Z
<null>
<null>
" }, "metadata": {}, "execution_count": 41 } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
10
<null>
1
2019-01-01 00:00:00Z
<null>
<null>
2
2019-01-02 00:00:00Z
<null>
<null>
" }, "metadata": {}, "execution_count": 42 } ], "source": [ "// To change a value directly through df\n", "df[0, 1] = 10; // 0 is the rowIndex, and 1 is the columnIndex. This sets the 0th value in the Ints columns to 10\n", "df" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
10
<null>
1
2019-01-01 00:00:00Z
100
Foo!
2
2019-01-02 00:00:00Z
<null>
<null>
" }, "metadata": {}, "execution_count": 43 } ], "source": [ "// Modify ints and strings columns by indexing\n", "ints[1] = 100;\n", "strings[1] = \"Foo!\";\n", "df" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexInfoDateTimesIntsStrings
0
DataTypeSystem.DateTimeSystem.Int32System.String
1
Length (excluding null values)323
" }, "metadata": {}, "execution_count": 44 } ], "source": [ "// Indexing can throw when types don't match.\n", "// ints[1] = \"this will throw because I am a string\"; \n", "// Info can be used to figure out the type of data in a column. \n", "df.Info()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
15
<null>
1
2019-01-01 00:00:00Z
105
Foo!
2
2019-01-02 00:00:00Z
<null>
<null>
" }, "metadata": {}, "execution_count": 45 } ], "source": [ "// Add 5 to ints through the DataFrame\n", "df[\"Ints\"].Add(5, inPlace: true);\n", "df" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
300
<null>
1
2019-01-01 00:00:00Z
2100
Foo!
2
2019-01-02 00:00:00Z
<null>
<null>
" }, "metadata": {}, "execution_count": 46 } ], "source": [ "// We can also use binary operators. Binary operators produce a copy, so assign it back to our Ints column \n", "df[\"Ints\"] = (ints / 5) * 100;\n", "df" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
300
Bar
1
2019-01-01 00:00:00Z
2100
Foo!
2
2019-01-02 00:00:00Z
-1
Bar
" }, "metadata": {}, "execution_count": 47 } ], "source": [ "// Fill nulls in our columns, if any. Ints[2], Strings[0] and Strings[1] are null\n", "df[\"Ints\"].FillNulls(-1, inPlace: true);\n", "df[\"Strings\"].FillNulls(\"Bar\", inPlace: true);\n", "df" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
2019-01-01 00:00:00Z
300
Bar
" }, "metadata": {}, "execution_count": 48 } ], "source": [ "// To inspect the first row\n", "DataFrameRow row0 = df.Rows[0];\n", "row0" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "using Microsoft.AspNetCore.Html;\n", "Formatter.Register((dataFrameRow, writer) =>\n", "{\n", " var cells = new List();\n", " cells.Add(td(i));\n", " foreach (var obj in dataFrameRow)\n", " {\n", " cells.Add(td(obj));\n", " }\n", " \n", " var t = table(\n", " tbody(\n", " cells));\n", " \n", " writer.Write(t);\n", "}, \"text/html\");" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
2019-01-01 00:00:00Z
300
Bar
" }, "metadata": {}, "execution_count": 50 } ], "source": [ "row0" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexvalue
0
True
1
False
2
True
" }, "metadata": {}, "execution_count": 51 } ], "source": [ "// Filter rows based on equality\n", "PrimitiveDataFrameColumn boolFilter = df[\"Strings\"].ElementwiseEquals(\"Bar\");\n", "boolFilter" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
300
Bar
1
2019-01-02 00:00:00Z
-1
Bar
" }, "metadata": {}, "execution_count": 52 } ], "source": [ "DataFrame filtered = df.Filter(boolFilter);\n", "filtered" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-02 00:00:00Z
-1
Bar
1
2019-01-01 00:00:00Z
300
Bar
2
2019-01-01 00:00:00Z
2100
Foo!
" }, "metadata": {}, "execution_count": 53 } ], "source": [ "// Sort our dataframe using the Ints column\n", "DataFrame sorted = df.Sort(\"Ints\");\n", "sorted" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesIntsStrings
0
2019-01-01 00:00:00Z
2
2
1
2019-01-02 00:00:00Z
1
1
" }, "metadata": {}, "execution_count": 54 } ], "source": [ "// GroupBy \n", "GroupBy groupBy = df.GroupBy(\"DateTimes\");\n", "// Count of values in each group\n", "DataFrame groupCounts = groupBy.Count();\n", "groupCounts" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "
indexDateTimesInts
0
2019-01-01 00:00:00Z
2400
1
2019-01-02 00:00:00Z
-1
" }, "metadata": {}, "execution_count": 55 } ], "source": [ "// Alternatively find the sum of the values in each group in Ints\n", "DataFrame intsGroupSum = groupBy.Sum(\"Ints\");\n", "intsGroupSum" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "using XPlot.Plotly;\n", "using System.Linq;" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "#r \"nuget:MathNet.Numerics,4.9.0\"" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "using MathNet.Numerics.Distributions;\n", "double mean = 0;\n", "double stdDev = 0.1;\n", "\n", "MathNet.Numerics.Distributions.Normal normalDist = new Normal(mean, stdDev);" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/html": "
\r\n\r\n" }, "metadata": {} } ], "source": [ "PrimitiveDataFrameColumn doubles = new PrimitiveDataFrameColumn(\"Normal Distribution\", normalDist.Samples().Take(1000));\n", "display(Chart.Plot(\n", " new Graph.Histogram()\n", " {\n", " x = doubles,\n", " nbinsx = 30\n", " }\n", "));\n" ] } ], "metadata": { "kernelspec": { "display_name": ".NET (C#)", "language": "C#", "name": ".net-csharp" }, "language_info": { "file_extension": ".cs", "mimetype": "text/x-csharp", "name": "C#", "pygments_lexer": "csharp", "version": "8.0" } }, "nbformat": 4, "nbformat_minor": 2 }