<title>ISI Bangla Segmented Scene Character Database 2014</title>
<style>
<!--
 /* Font Definitions */
@font-face
	{font-family:"MS Mincho";
	panose-1:0 0 0 0 0 0 0 0 0 0;
	mso-font-alt:"ＭＳ 明朝";
	mso-font-charset:128;
	mso-generic-font-family:roman;
	mso-font-format:other;
	mso-font-pitch:fixed;
	mso-font-signature:1 134676480 16 0 131072 0;}
@font-face
	{font-family:"Arial Unicode MS";
	mso-font-charset:128;
	mso-generic-font-family:swiss;
	mso-font-pitch:variable;
	mso-font-signature:-1 -369098753 63 0 4129023 0;}
@font-face
	{font-family:"Copperplate Gothic Light";
	mso-font-charset:0;
	mso-generic-font-family:swiss;
	mso-font-pitch:variable;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:"Tempus Sans ITC";
	mso-font-charset:0;
	mso-generic-font-family:decorative;
	mso-font-pitch:variable;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:"\@Arial Unicode MS";
	mso-font-charset:128;
	mso-generic-font-family:swiss;
	mso-font-pitch:variable;
	mso-font-signature:-1 -369098753 63 0 4129023 0;}
@font-face
	{font-family:"TimesNewRoman\,Bold";
	panose-1:0 0 0 0 0 0 0 0 0 0;
	mso-font-charset:0;
	mso-generic-font-family:roman;
	mso-font-format:other;
	mso-font-pitch:auto;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:BN-TTDurga;
	panose-1:4 0 0 0 0 0 0 0 0 0;
	mso-font-charset:0;
	mso-generic-font-family:decorative;
	mso-font-pitch:variable;
	mso-font-signature:3 0 0 0 1 0;}
@font-face
	{font-family:"\@MS Mincho";
	mso-font-charset:128;
	mso-generic-font-family:modern;
	mso-font-pitch:fixed;
	mso-font-signature:-1610612033 1757936891 16 0 131231 0;}
@font-face
	{font-family:TimesNewRoman;
	panose-1:0 0 0 0 0 0 0 0 0 0;
	mso-font-charset:0;
	mso-generic-font-family:roman;
	mso-font-format:other;
	mso-font-pitch:auto;
	mso-font-signature:3 0 0 0 1 0;}
 /* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{mso-style-parent:"";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.0pt;
	font-family:"Times New Roman";
	mso-fareast-font-family:"Times New Roman";
	mso-ansi-language:EN-GB;}
h1
	{mso-style-next:Normal;
	margin:0in;
	margin-bottom:.0001pt;
	text-align:center;
	mso-pagination:widow-orphan;
	mso-outline-level:1;
	font-size:12.0pt;
	mso-bidi-font-size:10.0pt;
	font-family:"Copperplate Gothic Light";
	mso-fareast-font-family:"Arial Unicode MS";
	mso-bidi-font-family:"Arial Unicode MS";
	color:#758B8A;
	mso-font-kerning:0pt;
	mso-ansi-language:EN-GB;
	font-weight:normal;}
h2
	{mso-style-next:Normal;
	margin:0in;
	margin-bottom:.0001pt;
	text-align:center;
	mso-pagination:widow-orphan;
	mso-outline-level:2;
	font-size:12.0pt;
	mso-bidi-font-size:10.0pt;
	font-family:"Tempus Sans ITC";
	mso-fareast-font-family:"Arial Unicode MS";
	mso-bidi-font-family:"Arial Unicode MS";
	color:#006600;
	mso-ansi-language:EN-GB;
	font-weight:bold;
	mso-bidi-font-weight:normal;
	text-decoration:underline;
	text-underline:single;}
p.MsoBodyTextIndent3, li.MsoBodyTextIndent3, div.MsoBodyTextIndent3
	{mso-style-parent:Default;
	mso-style-next:Default;
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	mso-layout-grid-align:none;
	text-autospace:none;
	font-size:10.0pt;
	mso-bidi-font-size:12.0pt;
	font-family:TimesNewRoman;
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";}
p.MsoBlockText, li.MsoBlockText, div.MsoBlockText
	{margin-top:0in;
	margin-right:147.75pt;
	margin-bottom:0in;
	margin-left:168.75pt;
	margin-bottom:.0001pt;
	text-align:justify;
	mso-pagination:widow-orphan;
	font-size:11.0pt;
	mso-bidi-font-size:10.0pt;
	font-family:"Times New Roman";
	mso-fareast-font-family:"Times New Roman";
	mso-ansi-language:EN-GB;}
a:link, span.MsoHyperlink
	{color:blue;
	text-decoration:underline;
	text-underline:single;}
a:visited, span.MsoHyperlinkFollowed
	{color:purple;
	text-decoration:underline;
	text-underline:single;}
p
	{margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:12.0pt;
	font-family:"Times New Roman";
	mso-fareast-font-family:"Times New Roman";}
p.Default, li.Default, div.Default
	{mso-style-name:Default;
	mso-style-parent:"";
	margin:0in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	mso-layout-grid-align:none;
	text-autospace:none;
	font-size:10.0pt;
	font-family:"TimesNewRoman\,Bold";
	mso-fareast-font-family:"Times New Roman";
	mso-bidi-font-family:"Times New Roman";}
p.ArticleTitle, li.ArticleTitle, div.ArticleTitle
	{mso-style-name:"Article Title";
	margin-top:3.0pt;
	margin-right:0in;
	margin-bottom:0in;
	margin-left:.25in;
	margin-bottom:.0001pt;
	mso-pagination:widow-orphan;
	font-size:10.5pt;
	mso-bidi-font-size:10.0pt;
	font-family:"Times New Roman";
	mso-fareast-font-family:"Times New Roman";}
p.References, li.References, div.References
	{mso-style-name:References;
	margin:0in;
	margin-bottom:.0001pt;
	text-align:justify;
	mso-pagination:widow-orphan;
	font-size:9.0pt;
	mso-bidi-font-size:10.0pt;
	font-family:"Times New Roman";
	mso-fareast-font-family:"Times New Roman";}
@page Section1
	{size:595.3pt 841.9pt;
	margin:50.45pt 177.75pt 50.45pt .5in;
	mso-header-margin:.5in;
	mso-footer-margin:.5in;
	mso-gutter-margin:.5in;
	mso-paper-source:0;}
div.Section1
	{page:Section1;}
-->
</style>
</head>

<body lang=EN-US link=blue vlink=purple style='tab-interval:.5in'>

<div class=Section1>

<p class=MsoNormal style='margin-top:0in;margin-right:29.25pt;margin-bottom:
0in;margin-left:33.75pt;margin-bottom:.0001pt'><span lang=EN-GB>&nbsp;</span></p>

<h1 style='margin-top:0in;margin-right:29.25pt;margin-bottom:0in;margin-left:
33.75pt;margin-bottom:.0001pt'><b><span lang=EN-GB style='font-size:22.0pt;
mso-bidi-font-size:14.0pt;color:#006600'> ISI Bangla Scene Character Database (Version 2014)</span></b><span lang=EN-GB style='font-size:22.0pt;mso-bidi-font-size:
10.0pt;color:#006600'><o:p></o:p></span></h1>
<p class=Default style='margin-top:0in;margin-right:29.25pt;margin-bottom:0in;
margin-left:33.75pt;margin-bottom:.0001pt'><span style='mso-bidi-font-size:
12.0pt'><![if !supportEmptyParas]>&nbsp;<![endif]><o:p></o:p></span></p>

<p class=MsoNormal style='margin-top:0in;margin-right:29.25pt;margin-bottom:
0in;margin-left:33.75pt;margin-bottom:.0001pt'><span lang=EN-GB>&nbsp;</span></p>

<p style='margin-top:0in;margin-right:29.25pt;margin-bottom:0in;margin-left:
33.75pt;margin-bottom:.0001pt;text-align:justify'><span style='mso-bidi-font-size:
10.0pt'>

Bangla script is used to write Bangla and a few other languages of the eastern part of South Asia such as Assamese and Manipuri. This script holds the official status in the two neighboring countries Bangladesh and India and it is the 6th most popularly used script in the world. Its alphabet set has has several diacritics and a large number of conjunct characters in addition to 50 basic characters which include 11 vowels and 39 consonants. To meet the requirement of a standard dataset of scene characters of Bangla script for planned research works on its scene text recognition, recently, one such dataset of Bangla characters or their parts has been developed at the Computer Vision and Pattern Recognition Unit of the Indian Statistical Institute, Kolkata. Its samples have been extracted from 260 outdoor scene images captured at different times from the streets, lanes and by-lanes of the state of West Bengal of India using a varieties of digital camera. Since the occurrence statistics of several Bangla characters in real life texts is very low, we added several artificially created samples of these characters with the help of Microsoft Power Point Software. <a
href="SmallSampleSetSegmentedSceneCharacters.zip"><b>A small subset of this sample database may be downloaded by clicking here</b></a> (We are hopeful to release this entire segmented character database soon). The filname of each real sample is as follows &#60Unicode of parent word&#62&#95&#60Graphical transliteration of parent word&#62&#95&#60File name of source scene image&#62&#95&#60Left column number of the character in its parent word&#62&#95&#60Right column number of the character in its parent word>.jpg while the filename of each artificial sample is as follows &#60Character class number&#62&#95&#60Graphical transliteration of the character&#62&#95&#60Sample sequence number&#62.jpg <o:p></o:p></span></p>

<br>

<p style='margin-top:0in;margin-right:29.25pt;margin-bottom:0in;margin-left:
33.75pt;margin-bottom:.0001pt;text-align:justify'><span style='mso-bidi-font-size:
10.0pt'>
Since a piece of Bangla text has three distinct regions namely upper, middle and lower regions and since this script has a large character set, a common approach of devoloping an automatic recognition system of this script is to segment each line of Bangla texts into the three horizontal regions and use a distinct recognizer for each of these three regions. Thus, in the present database we provide samples of Bangla characters or their segmented parts belonging to each of the three regions as it can be seen from the below Figure. <o:p></o:p></span></p> 

<br>

<p style='margin-top:0in;margin-right:29.25pt;margin-bottom:0in;margin-left:
33.75pt;margin-bottom:.0001pt;text-align:center'><span style='mso-bidi-font-size:
10.0pt'>

<img border=1 width=501 height=234 src="SceneCharSampleTile.jpg"> <o:p></o:p></span></p> 


<p class=MsoNormal style='margin-top:0in;margin-right:29.25pt;margin-bottom:
0in;margin-left:33.75pt;margin-bottom:.0001pt'><span lang=EN-GB>&nbsp;</span></p>
<p class=MsoNormal style='margin-top:0in;margin-right:29.25pt;margin-bottom:
0in;margin-left:37.5pt;margin-bottom:.0001pt;text-align:justify'><span
lang=EN-GB; font-size:12.0pt><b> Ref:-</b> </span><span lang=EN-GB style='font-size:12.0 pt;mso-bidi-font-size: 10.0pt'>S. Tian, U. Bhattacharya, S. Lu, B. Su, Q. Wang, X. Wei, Y. Lu and C. L. Tan, Multilingual Scene Character Recognition with Co-occurrence of Histogram of Oriented Gradients, <i> Pattern Recognition </i> (Online available).
</span></p>
<br>



<h3 class=MsoNormal align=center style='text-align:center'> <a href="ISI_Bengali_Character_Data_AppForm.pdf"> Application form for obtaining "ISI BENGALI_CHARACTER_DATASET" </a></h3>

<br>

<p class=MsoNormal align=center style='margin-top:0in;margin-right:29.25pt;
margin-bottom:0in;margin-left:33.75pt;margin-bottom:.0001pt;text-align:center'><span
lang=EN-GB><a href="http://www.isical.ac.in/~ujjwal"><span style='text-decoration:
none;text-underline:none'><U1:SHAPETYPE id="_x0000_t75" stroked="f" filled="f" path="m@4@5l@4@11@9@11@9@5xe" o:preferrelative="t" o:spt="75" coordsize="21600,21600"><U1:STROKE joinstyle="miter"/><U1:FORMULAS><U1:F eqn="if lineDrawn pixelLineWidth 0"/><U1:F eqn="sum @0 1 0"/><U1:F eqn="sum 0 0 @1"/><U1:F eqn="prod @2 1 2"/><U1:F eqn="prod @3 21600 pixelWidth"/><U1:F eqn="prod @3 21600 pixelHeight"/><U1:F eqn="sum @0 0 1"/><U1:F eqn="prod @6 1 2"/><U1:F eqn="prod @7 21600 pixelWidth"/><U1:F eqn="sum @8 21600 0"/><U1:F eqn="prod @7 21600 pixelHeight"/><U1:F eqn="sum @10 21600 0"/></U1:FORMULAS><U1:PATH o:connecttype="rect" gradientshapeok="t" o:extrusionok="f"/><o:lock aspectratio="t" u1:ext="edit"></o:lock></U1:SHAPETYPE><U1:SHAPE id="_x0000_i1025" style="WIDTH: 64.5pt; HEIGHT: 54pt" type="#_x0000_t75"><U1:IMAGEDATA o:title="bs00580_" src="./database_files/image001.wmz"/></U1:SHAPE><img
border=0 width=86 height=72 id="_x0000_i1025" src="Image/image002.gif"
u1:shapes="_x0000_i1025"></span></a></span></p>

<p class=MsoNormal align=center style='margin-top:0in;margin-right:29.25pt;
margin-bottom:0in;margin-left:33.75pt;margin-bottom:.0001pt;text-align:center'><b><span
lang=EN-GB><a href="http://www.isical.ac.in/~ujjwal">Back to Ujjwal's main page</a></span></b></p>

</div>

</body>