RapidMiner - k-Medoids. Identify the medoid - cluster-analysis

I am using Rapid Miner to see some results and performances for the k-Medoids algorithm. I was able to create the scheme and see the output but I would like to see inside each cluster created the central point (the medoid chosen) is there any way to do that?
Img1: Design
Img2: Result

You can use the Extract Cluster Prototypes operator to create an example set corresponding to the cluster centroids. You can then use the Append operator to join it to the original data although there is a bit of work to do to get the example sets compatible.
Here's an example
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="k_medoids" compatibility="7.0.001" expanded="true" height="82" name="Clustering" width="90" x="179" y="136">
<parameter key="k" value="3"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="136">
<list key="function_descriptions">
<parameter key="type" value=""data""/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="label|id"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="extract_prototypes" compatibility="7.0.001" expanded="true" height="82" name="Extract Cluster Prototypes" width="90" x="313" y="34"/>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="447" y="34">
<list key="function_descriptions">
<parameter key="type" value=""centroid""/>
</list>
</operator>
<operator activated="true" breakpoints="before" class="append" compatibility="7.0.001" expanded="true" height="103" name="Append" width="90" x="581" y="85"/>
<connect from_op="Retrieve Iris" from_port="output" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_op="Extract Cluster Prototypes" to_port="model"/>
<connect from_op="Clustering" from_port="clustered set" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Append" to_port="example set 2"/>
<connect from_op="Extract Cluster Prototypes" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
The type attribute can be used as the colour in a plot so you can see the centroids.

Related

Powershell Make a Dymo LabelWriter 450 Label

I made a label and then opened it in notepad and got the following.
<?xml version="1.0" encoding="utf-8"?>
<DieCutLabel Version="8.0" Units="twips" MediaType="Default">
<PaperOrientation>Landscape</PaperOrientation>
<Id>Small30336</Id>
<IsOutlined>false</IsOutlined>
<PaperName>30336 1 in x 2-1/8 in</PaperName>
<DrawCommands>
<RoundRectangle X="0" Y="0" Width="1440" Height="3060" Rx="180" Ry="180" />
</DrawCommands>
<ObjectInfo>
<TextObject>
<Name>TEXT</Name>
<ForeColor Alpha="255" Red="0" Green="0" Blue="0" />
<BackColor Alpha="0" Red="255" Green="255" Blue="255" />
<LinkedObjectName />
<Rotation>Rotation0</Rotation>
<IsMirrored>False</IsMirrored>
<IsVariable>False</IsVariable>
<GroupID>-1</GroupID>
<IsOutlined>False</IsOutlined>
<HorizontalAlignment>Center</HorizontalAlignment>
<VerticalAlignment>Top</VerticalAlignment>
<TextFitMode>ShrinkToFit</TextFitMode>
<UseFullFontHeight>True</UseFullFontHeight>
<Verticalized>False</Verticalized>
<StyledText>
<Element>
<String xml:space="preserve">Hello
World</String>
<Attributes>
<Font Family="Arial" Size="12" Bold="False" Italic="False" Underline="False" Strikeout="False" />
<ForeColor Alpha="255" Red="0" Green="0" Blue="0" HueScale="100" />
</Attributes>
</Element>
</StyledText>
</TextObject>
<Bounds X="130" Y="147" Width="2798" Height="1188" />
</ObjectInfo>
so what I attempted to do is use powershell to make the same label and print it automatically per below
$Word = "Hello"
$Word1 = "World"
#"
<?xml version="1.0" encoding="utf-8"?>
<DieCutLabel Version="8.0" Units="twips" MediaType="Default">
<PaperOrientation>Landscape</PaperOrientation>
<Id>Small30336</Id>
<IsOutlined>false</IsOutlined>
<PaperName>30336 1 in x 2-1/8 in</PaperName>
<DrawCommands>
<RoundRectangle X="0" Y="0" Width="1440" Height="3060" Rx="180" Ry="180" />
</DrawCommands>
<ObjectInfo>
<TextObject>
<Name>TEXT</Name>
<ForeColor Alpha="255" Red="0" Green="0" Blue="0" />
<BackColor Alpha="0" Red="255" Green="255" Blue="255" />
<LinkedObjectName />
<Rotation>Rotation0</Rotation>
<IsMirrored>False</IsMirrored>
<IsVariable>False</IsVariable>
<GroupID>-1</GroupID>
<IsOutlined>False</IsOutlined>
<HorizontalAlignment>Center</HorizontalAlignment>
<VerticalAlignment>Top</VerticalAlignment>
<TextFitMode>ShrinkToFit</TextFitMode>
<UseFullFontHeight>True</UseFullFontHeight>
<Verticalized>False</Verticalized>
<StyledText>
<Element>
<String xml:space="preserve">$Word
$Word1 </String>
<Attributes>
<Font Family="Arial" Size="12" Bold="False" Italic="False" Underline="False" Strikeout="False" />
<ForeColor Alpha="255" Red="0" Green="0" Blue="0" HueScale="100" />
</Attributes>
</Element>
</StyledText>
</TextObject>
<Bounds X="130" Y="147" Width="2798" Height="1188" />
</ObjectInfo>
</DieCutLabel>
"#>Save.label
& ".\PrintLabel.exe" Save.label
is there a way to make this work or a better way to do this ?
it will print but a default label in the examples and not the one i want to say hello world (on two different lines)
i tried to add full links to each but it does the same thing.

Building high-precision classifier with RapidMiner

Supposedly, I need to build classifier with high precision and low recall. Which operator should I use in RapidMiner? For example, ApplyModel operator doesn't support any intervention in threshold? It's also unclear how the same ApplyModel operator works with models of different modeling: NeuralNetworks, SVN, etc.
You could use the Select Recall operator. This finds the threshold needed to get the recall you choose.
The input to this operator is a labelled example set containing confidences, the output is a threshold that is applied to the labelled data using the Apply Threshold operator.
The Apply Model operator requires an example set and a model as input.
I've made a simple example that illustrates all of this.
<?xml version="1.0" encoding="UTF-8"?><process version="7.2.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.2.003" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.2.003" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="7.2.003" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="289"/>
<operator activated="true" class="apply_model" compatibility="7.2.003" expanded="true" height="82" name="Apply Model" width="90" x="179" y="289">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.003" expanded="true" height="103" name="Multiply" width="90" x="246" y="136"/>
<operator activated="true" class="materialize_data" compatibility="7.2.003" expanded="true" height="82" name="Materialize Data" width="90" x="380" y="136"/>
<operator activated="true" class="select_recall" compatibility="7.2.003" expanded="true" height="82" name="Select Recall" width="90" x="313" y="34">
<parameter key="min_recall" value="0.1"/>
</operator>
<operator activated="true" class="apply_threshold" compatibility="7.2.003" expanded="true" height="82" name="Apply Threshold" width="90" x="447" y="34"/>
<operator activated="true" class="performance_binominal_classification" compatibility="7.2.003" expanded="true" height="82" name="Performance" width="90" x="581" y="34"/>
<operator activated="true" class="performance_binominal_classification" compatibility="7.2.003" expanded="true" height="82" name="Performance (2)" width="90" x="581" y="136"/>
<connect from_op="Retrieve Sonar" from_port="output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Naive Bayes" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Select Recall" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Materialize Data" to_port="example set input"/>
<connect from_op="Materialize Data" from_port="example set output" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Select Recall" from_port="example set" to_op="Apply Threshold" to_port="example set"/>
<connect from_op="Select Recall" from_port="threshold" to_op="Apply Threshold" to_port="threshold"/>
<connect from_op="Apply Threshold" from_port="example set" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="result 1"/>
<connect from_op="Performance (2)" from_port="performance" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Hope that helps as a start.

How to add XY-line to bar chart creating a multitype chart with another axis?

Just wondering if there would be a possibility to create such a Jasper report chart that would be a combination of many chart types?
In my case I would like to combine Bar Chart and XY Line on two different axis. I already have a following kind of bar chart ready:
Ideally I would be able to add one XY-line in the same chart as well, which would present the cumulative value of the sum of all hour types for each month.See picture at the end. I have a feeling that this is not possible, and maybe I just should create a separate chart for the XY-line?
This is follow up on this question How to populate chart data with JavaBeans collection dataSet? (see my answer to understand further details if you like a simple bar chart and use the series expression dynamically)
To achieve a multi axis chart you should use <multiAxisChart> and it will become a little bit more complicated. We can not use the dynamic seriesExpression anymore and need to define each series manually, therefore I will use your original bean but still in a separated datasource.
Java bean
public class WorkingHours {
private int month = 0;
private double hoursNormal = 0;
private double hoursTravel = 0;
private double hoursOvertime = 0;
private double hoursTotalCumulative = 0;
public WorkingHours(int month, double hoursNormal, double hoursTravel, double hoursOvertime, double hoursTotalCumulative) {
super();
this.month = month;
this.hoursNormal = hoursNormal;
this.hoursTravel = hoursTravel;
this.hoursOvertime = hoursOvertime;
this.hoursTotalCumulative = hoursTotalCumulative;
}
//getter and setter
}
Fill with data (use your logic) and pass as java.util.List in parameter
List<WorkingHours> list = new ArrayList<WorkingHours>();
list.add(new WorkingHours(1, 2.3, 1.2, 2.1,4.1));
list.add(new WorkingHours(2, 5.3, 2.2,3, 9.1));
list.add(new WorkingHours(3, 3.1, 0.5, 2.0, 20.5));
Map<String, Object> paramMap = new HashMap<String, Object>();
paramMap.put("CHART_DATA", list);
The report (jrxml)
<?xml version="1.0" encoding="UTF-8"?>
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://jasperreports.sourceforge.net/jasperreports http://jasperreports.sourceforge.net/xsd/jasperreport.xsd" name="working_hours" pageWidth="595" pageHeight="842" whenNoDataType="AllSectionsNoDetail" columnWidth="555" leftMargin="20" rightMargin="20" topMargin="20" bottomMargin="20" uuid="1a12c021-57e2-4482-a273-56cbd3f78a17">
<property name="ireport.zoom" value="1.0"/>
<property name="ireport.x" value="0"/>
<property name="ireport.y" value="0"/>
<subDataset name="chartDataSet" uuid="119b7f0e-01ef-4e2b-b628-d76f51e83768">
<field name="month" class="java.lang.Integer"/>
<field name="hoursNormal" class="java.lang.Double"/>
<field name="hoursTravel" class="java.lang.Double"/>
<field name="hoursOvertime" class="java.lang.Double"/>
<field name="hoursTotalCumulative" class="java.lang.Double"/>
</subDataset>
<parameter name="CHART_DATA" class="java.util.List" isForPrompting="false"/>
<summary>
<band height="282" splitType="Stretch">
<multiAxisChart>
<chart evaluationTime="Report">
<reportElement x="62" y="17" width="419" height="235" uuid="8a16251e-8c1a-4384-8487-9be8f6c274e5"/>
<chartTitle/>
<chartSubtitle/>
<chartLegend position="Right"/>
</chart>
<multiAxisPlot>
<plot/>
<axis position="rightOrBottom">
<lineChart>
<chart evaluationTime="Report">
<reportElement positionType="Float" x="0" y="25" width="270" height="175" backcolor="#FFFFFF" uuid="4a755d76-1350-4921-a0be-20ae9e485e12"/>
<chartTitle color="#000000"/>
<chartSubtitle color="#000000"/>
<chartLegend textColor="#000000" backgroundColor="#FFFFFF" position="Right"/>
</chart>
<categoryDataset>
<dataset>
<datasetRun subDataset="chartDataSet" uuid="abec2dce-b670-4e84-b71f-469d954dbcb5">
<dataSourceExpression><![CDATA[new net.sf.jasperreports.engine.data.JRBeanCollectionDataSource($P{CHART_DATA})]]></dataSourceExpression>
</datasetRun>
</dataset>
<categorySeries>
<seriesExpression><![CDATA["Total Cum"]]></seriesExpression>
<categoryExpression><![CDATA[$F{month}]]></categoryExpression>
<valueExpression><![CDATA[$F{hoursTotalCumulative}]]></valueExpression>
</categorySeries>
</categoryDataset>
<linePlot isShowLines="true" isShowShapes="true">
<plot>
<seriesColor seriesOrder="0" color="#9900CC"/>
</plot>
<categoryAxisFormat>
<axisFormat/>
</categoryAxisFormat>
<valueAxisLabelExpression><![CDATA["Total Cum"]]></valueAxisLabelExpression>
<valueAxisFormat>
<axisFormat labelColor="#000000" tickLabelColor="#000000" tickLabelMask="#,##0" axisLineColor="#000000"/>
</valueAxisFormat>
</linePlot>
</lineChart>
</axis>
<axis>
<barChart>
<chart evaluationTime="Report">
<reportElement x="0" y="0" width="0" height="0" backcolor="#FFFFFF" uuid="723abd06-b593-422a-b679-043084525a8c"/>
<chartTitle color="#000000"/>
<chartSubtitle color="#000000"/>
<chartLegend textColor="#000000" backgroundColor="#FFFFFF" position="Right"/>
</chart>
<categoryDataset>
<dataset>
<datasetRun subDataset="chartDataSet" uuid="abec2dce-b670-4e84-b71f-469d954dbcb5">
<dataSourceExpression><![CDATA[new net.sf.jasperreports.engine.data.JRBeanCollectionDataSource($P{CHART_DATA})]]></dataSourceExpression>
</datasetRun>
</dataset>
<categorySeries>
<seriesExpression><![CDATA["Normal"]]></seriesExpression>
<categoryExpression><![CDATA[$F{month}]]></categoryExpression>
<valueExpression><![CDATA[$F{hoursNormal}]]></valueExpression>
</categorySeries>
<categorySeries>
<seriesExpression><![CDATA["Travel"]]></seriesExpression>
<categoryExpression><![CDATA[$F{month}]]></categoryExpression>
<valueExpression><![CDATA[$F{hoursTravel}]]></valueExpression>
</categorySeries>
<categorySeries>
<seriesExpression><![CDATA["Overtime"]]></seriesExpression>
<categoryExpression><![CDATA[$F{month}]]></categoryExpression>
<valueExpression><![CDATA[$F{hoursOvertime}]]></valueExpression>
</categorySeries>
</categoryDataset>
<barPlot>
<plot/>
<itemLabel/>
<categoryAxisFormat>
<axisFormat/>
</categoryAxisFormat>
<valueAxisLabelExpression><![CDATA["h"]]></valueAxisLabelExpression>
<valueAxisFormat>
<axisFormat labelColor="#000000" tickLabelColor="#000000" tickLabelMask="#,##0" axisLineColor="#000000"/>
</valueAxisFormat>
</barPlot>
</barChart>
</axis>
</multiAxisPlot>
</multiAxisChart>
</band>
</summary>
</jasperReport>
Key points:
We use <multiAxisChart> with 2 <axis> on one the <lineChart> on the other <barChart>, for the <barChart> we define every <categorySeries> separately.
Output
For more demos on chart see this: Jasper Reports Chart Samples

Change Jenkins text "build" to "Linux Build" or "Mac Build"

Can we be more precise of target build?
I would like to know if we can customize a build type text of Jenkins. Most of the project have a table format instead
.svg source
<?xml version="1.0"?>
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="20">
<linearGradient id="a" x2="0" y2="100%">
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
<stop offset="1" stop-opacity=".1"/>
</linearGradient>
<rect rx="3" width="90" height="20" fill="#555"/>
<rect rx="3" x="37" width="53" height="20" fill="#4c1"/>
<path fill="#4c1" d="M37 0h4v20h-4z"/>
<rect rx="3" width="90" height="20" fill="url(#a)"/>
<g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
<text x="19.5" y="15" fill="#010101" fill-opacity=".3">build</text>
<text x="19.5" y="14">build</text>
<text x="62.5" y="15" fill="#010101" fill-opacity=".3">passing</text>
<text x="62.5" y="14">passing</text>
</g>
</svg>

Validating Weka Multilayer Perceptron in Rapidminer

I was trying to implement the W-Multilayer Perceptron from the Weka Rapidminer plugin. When I run it in my dataset it takes around 1.5 hours to finish training a simple 5 layer perceptron.
However although the Perceptron itself seem to be working properly when I put it on the validation operator, it gets stuck in the validation phase consuming more and more memory. I left it running during the night and it has been there for 15 hours. To me it doesn't make sense since after creating the mode, applying it shouldn't take nearly so much time. Does anyone who understood the workings of this can tell me what's happening?
The way I am using the operator is the following, in my scheme it's directly connected to a read database operator with only the set role operator between them.
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.008" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="split_validation" compatibility="5.3.008" expanded="true" height="112" name="Validation (6)" width="90" x="45" y="120">
<process expanded="true">
<operator activated="true" breakpoints="after" class="weka:W-MultilayerPerceptron" compatibility="5.3.001" expanded="true" height="76" name="W-MultilayerPerceptron" width="90" x="69" y="30">
<parameter key="N" value="100.0"/>
<parameter key="S" value="30.0"/>
<parameter key="H" value="5"/>
</operator>
<connect from_port="training" to_op="W-MultilayerPerceptron" to_port="training set"/>
<connect from_op="W-MultilayerPerceptron" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.008" expanded="true" height="76" name="Apply Model (6)" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="5.3.008" expanded="true" height="76" name="Performance (6)" width="90" x="147" y="30"/>
<connect from_port="model" to_op="Apply Model (6)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (6)" to_port="unlabelled data"/>
<connect from_op="Apply Model (6)" from_port="labelled data" to_op="Performance (6)" to_port="labelled data"/>
<connect from_op="Performance (6)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>