Get Genbank Sequences
Code sample below shows how to get sequences from Genbank.
Imports System
Imports System.Windows.Forms
Imports System.Drawing
Imports System.Threading
Imports System.Collections
Imports System.Diagnostics
Imports BioloMICS.BioCallback
Imports BioloMICS.SharedClasses
Imports System.Text
Imports System.Xml
Public Class MainForm
Inherits System.Windows.Forms.Form
'Your sub goes here
Public Sub New()
InitializeComponent()
me.TopMost = true
End Sub
Sub InitializeComponent
Me.btnSearch = New System.Windows.Forms.Button()
Me.txtGBNumberToSearch = New System.Windows.Forms.TextBox()
Me.lblTitle = New System.Windows.Forms.Label()
Me.dgvListOfResults = New System.Windows.Forms.DataGridView()
Me.Organism = New System.Windows.Forms.DataGridViewTextBoxColumn()
Me.Namer = New System.Windows.Forms.DataGridViewTextBoxColumn()
Me.MB = New System.Windows.Forms.DataGridViewTextBoxColumn()
Me.Status = New System.Windows.Forms.DataGridViewTextBoxColumn()
Me.Year = New System.Windows.Forms.DataGridViewTextBoxColumn()
CType(Me.dgvListOfResults,System.ComponentModel.ISupportInitialize).BeginInit
Me.SuspendLayout
'
'btnSearch
'
Me.btnSearch.Location = New System.Drawing.Point(197, 28)
Me.btnSearch.Name = "btnSearch"
Me.btnSearch.Size = New System.Drawing.Size(75, 23)
Me.btnSearch.TabIndex = 0
Me.btnSearch.Text = "Search"
Me.btnSearch.UseCompatibleTextRendering = true
Me.btnSearch.UseVisualStyleBackColor = true
AddHandler Me.btnSearch.Click, AddressOf Me.BtnSearchClick
'
'txtGBNumberToSearch
'
Me.txtGBNumberToSearch.Location = New System.Drawing.Point(12, 31)
Me.txtGBNumberToSearch.Name = "txtGBNumberToSearch"
Me.txtGBNumberToSearch.Size = New System.Drawing.Size(178, 20)
Me.txtGBNumberToSearch.TabIndex = 1
Me.txtGBNumberToSearch.Text = "AF123456"
'
'lblTitle
'
Me.lblTitle.Location = New System.Drawing.Point(11, 15)
Me.lblTitle.Name = "lblTitle"
Me.lblTitle.Size = New System.Drawing.Size(178, 23)
Me.lblTitle.TabIndex = 2
Me.lblTitle.Text = "Accession number to search for:"
Me.lblTitle.UseCompatibleTextRendering = true
'
'dgvListOfResults
'
Me.dgvListOfResults.AllowUserToAddRows = false
Me.dgvListOfResults.AllowUserToDeleteRows = false
Me.dgvListOfResults.Anchor = CType((((System.Windows.Forms.AnchorStyles.Top Or System.Windows.Forms.AnchorStyles.Bottom) _
Or System.Windows.Forms.AnchorStyles.Left) _
Or System.Windows.Forms.AnchorStyles.Right),System.Windows.Forms.AnchorStyles)
Me.dgvListOfResults.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize
Me.dgvListOfResults.Columns.AddRange(New System.Windows.Forms.DataGridViewColumn() {Me.Organism, Me.Namer, Me.MB, Me.Status, Me.Year})
Me.dgvListOfResults.Location = New System.Drawing.Point(12, 57)
Me.dgvListOfResults.Name = "dgvListOfResults"
Me.dgvListOfResults.ReadOnly = true
Me.dgvListOfResults.Size = New System.Drawing.Size(701, 395)
Me.dgvListOfResults.TabIndex = 3
AddHandler Me.dgvListOfResults.CellDoubleClick, AddressOf Me.DgvListOfResultsDoubleClick
'
'Organism
'
Me.Organism.HeaderText = "Organism"
Me.Organism.Name = "Organism"
Me.Organism.ReadOnly = true
Me.Organism.Width = 150
'
'Namer
'
Me.Namer.HeaderText = "Accession number version"
Me.Namer.Name = "Namer"
Me.Namer.ReadOnly = true
'
'MB
'
Me.MB.HeaderText = "Accession number"
Me.MB.Name = "MB"
Me.MB.ReadOnly = true
'
'Status
'
Me.Status.HeaderText = "Sequence"
Me.Status.MaxInputLength = 1000000
Me.Status.Name = "Status"
Me.Status.ReadOnly = true
Me.Status.Width = 200
'
'Year
'
Me.Year.HeaderText = "Year"
Me.Year.Name = "Year"
Me.Year.ReadOnly = true
'
'MainForm
'
Me.AcceptButton = Me.btnSearch
Me.ClientSize = New System.Drawing.Size(725, 464)
Me.Controls.Add(Me.dgvListOfResults)
Me.Controls.Add(Me.txtGBNumberToSearch)
Me.Controls.Add(Me.btnSearch)
Me.Controls.Add(Me.lblTitle)
Me.Name = "MainForm"
Me.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen
Me.Text = "MainForm"
CType(Me.dgvListOfResults,System.ComponentModel.ISupportInitialize).EndInit
Me.ResumeLayout(false)
Me.PerformLayout
End Sub
Private Organism As System.Windows.Forms.DataGridViewTextBoxColumn
Private txtGBNumberToSearch As System.Windows.Forms.TextBox
Private Year As System.Windows.Forms.DataGridViewTextBoxColumn
Private Status As System.Windows.Forms.DataGridViewTextBoxColumn
Private Namer As System.Windows.Forms.DataGridViewTextBoxColumn
Private dgvListOfResults As System.Windows.Forms.DataGridView
Private MB As System.Windows.Forms.DataGridViewTextBoxColumn
Private lblTitle As System.Windows.Forms.Label
Private btnSearch As System.Windows.Forms.Button
<STAThread()> _
Public Shared Sub Main()
dim frm as new MainForm()
frm.ShowDialog()
End Sub
Public Sub GenbankQuery()
' Clear the results grid
dgvListOfResults.Rows.Clear
' Create a WebRequest to the remote site (webservice of Genbank)
Dim request As System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=" & txtGBNumberToSearch.text & "&retmode=XML")
' NB! Use the following line ONLY if the website is protected
'request.Credentials = New System.Net.NetworkCredential("username", "password")
' Call the remote site, and parse the data in a response object
Dim response As System.Net.HttpWebResponse = request.GetResponse()
' Check if the response is OK (status code 200)
If response.StatusCode = System.Net.HttpStatusCode.OK Then
' Parse the contents from the response to a stream object
Dim stream As System.IO.Stream = response.GetResponseStream()
' Create a reader for the stream object
Dim reader As New System.IO.StreamReader(stream)
' Read from the stream object using the reader, put the contents in a string
Dim contents As String = reader.ReadToEnd()
' Create a new, empty XML document
Dim document As New XmlDocument()
' Load the contents into the XML document
document.LoadXml(contents)
Dim nodereader As New XmlNodeReader(document)
Dim accessionVersion as String = ""
Dim locusName as String = ""
Dim pubYear As String = ""
Dim organismName as String = ""
While nodereader.Read()
' Read the XmlDocument as a stream of XML
' If node name is "name" get its content
If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_accession-version" Then
' Keep the name in memory until the Accession number is found (this is the last node that we want to get in this example)
accessionVersion = nodereader.ReadString()
End If
' If node name is "GBSeq_organism" get its content
If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_organism" Then
organismName = nodereader.ReadString()
End If
' If node name is "GBSeq_locus" get its content
If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_locus" Then
locusName = nodereader.ReadString()
End If
' If node name is "GBSeq_update-date" get its content
If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_update-date" Then
pubYear = nodereader.ReadString()
End If
' If node name is "GBSeq_sequence" get its content and reset variables
If nodereader.MoveToContent() = XmlNodeType.Element And nodereader.Name = "GBSeq_sequence" Then
' Add row to the results grid
dgvListOfResults.Rows.Add()
' Add the Organism name to the the first cell (Organism name column; index always starts at 0)
dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(0).Value = organismName
' Add the Accession version to the the first cell (Accession version column)
dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(1).Value = accessionVersion
' Add the Locus name to the second cell (Locus name column)
dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(2).Value = locusName
' Add the sequence to the third cell (sequence column)
dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(3).Value = nodereader.ReadString()
' Add the date of last update to the fourth cell (Year column)
dgvListOfResults.Rows(dgvListOfResults.Rows.Count - 1).Cells(4).Value = pubYear
'Reset the variables for the possible next record found
accessionVersion = ""
locusName = ""
pubYear = ""
organismName = ""
End If
End While
Else
Throw New Exception("Could not retrieve document from the URL, response code: " & response.StatusCode)
End If
End Sub
Sub BtnSearchClick(sender As Object, e As EventArgs)
GenbankQuery()
End Sub
Sub DgvListOfResultsDoubleClick(sender As Object, e As DataGridViewCellEventArgs)
' Add the selected item URL to the clipboard and close the form
' Clear the clipboard if something was in it
Clipboard.Clear
' Add the URL of the selected species using its genbank number
Clipboard.SetText("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=" & dgvListOfResults.Rows(e.RowIndex).Cells(1).Value)
' Sedn a message to the end-user that everything is done ;)
microsoft.VisualBasic.MsgBox("URL is now in your clipboard")
me.close
End Sub
End Class